From 792d5b9e5ee40e58b922ae32e5a6ee9aa9586cbc Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 18 May 2007 22:35:04 -0700
Subject: First set of changes for reorganized cache coherence support.
 Compiles but doesn't work... committing just so I can merge (stupid bk!).

src/mem/bridge.cc:
    Get rid of SNOOP_COMMIT.
src/mem/bus.cc:
src/mem/packet.hh:
    Get rid of SNOOP_COMMIT & two-pass snoop.
    First bits of EXPRESS_SNOOP support.
src/mem/cache/base_cache.cc:
src/mem/cache/base_cache.hh:
src/mem/cache/cache.hh:
src/mem/cache/cache_impl.hh:
src/mem/cache/miss/blocking_buffer.cc:
src/mem/cache/miss/miss_queue.cc:
src/mem/cache/prefetch/base_prefetcher.cc:
    Big reorg of ports and port-related functions & events.
src/mem/cache/cache.cc:
src/mem/cache/cache_builder.cc:
src/mem/cache/coherence/SConscript:
    Get rid of UniCoherence object.

--HG--
extra : convert_revision : 7672434fa3115c9b1c94686f497e57e90413b7c3
---
 src/mem/bridge.cc                         |   6 -
 src/mem/bus.cc                            |  43 ++--
 src/mem/cache/base_cache.cc               | 315 +++++++++--------------------
 src/mem/cache/base_cache.hh               | 321 +++++-------------------------
 src/mem/cache/cache.cc                    |   6 -
 src/mem/cache/cache.hh                    |  52 +++--
 src/mem/cache/cache_builder.cc            |  10 +-
 src/mem/cache/cache_impl.hh               | 278 ++++++++++++++++++++------
 src/mem/cache/coherence/SConscript        |   1 -
 src/mem/cache/coherence/uni_coherence.cc  | 135 -------------
 src/mem/cache/coherence/uni_coherence.hh  | 146 --------------
 src/mem/cache/miss/blocking_buffer.cc     |  14 +-
 src/mem/cache/miss/miss_queue.cc          |  16 +-
 src/mem/cache/prefetch/base_prefetcher.cc |   8 +-
 src/mem/packet.hh                         |   2 +-
 15 files changed, 435 insertions(+), 918 deletions(-)
 delete mode 100644 src/mem/cache/coherence/uni_coherence.cc
 delete mode 100644 src/mem/cache/coherence/uni_coherence.hh

diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index f525ccb48..5460c88dd 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -112,10 +112,6 @@ Bridge::BridgePort::reqQueueFull()
 bool
 Bridge::BridgePort::recvTiming(PacketPtr pkt)
 {
-    if (!(pkt->flags & SNOOP_COMMIT))
-        return true;
-
-
     DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n",
                 pkt->getSrc(), pkt->getDest(), pkt->getAddr());
 
@@ -255,8 +251,6 @@ Bridge::BridgePort::trySend()
 
     PacketPtr pkt = buf->pkt;
 
-    pkt->flags &= ~SNOOP_COMMIT; //CLear it if it was set
-
     if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite &&
             pkt->result != Packet::Nacked && pkt->getOffset(pbs) &&
             pkt->getSize() != pbs) {
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 95d4e2873..895123f8b 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -182,8 +182,10 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (tickNextIdle > curTick ||
-            (retryList.size() && (!inRetry || pktPort != retryList.front()))) {
+    if (!(pkt->flags & EXPRESS_SNOOP) &&
+        tickNextIdle > curTick ||
+        (retryList.size() && (!inRetry || pktPort != retryList.front())))
+    {
         addToRetryList(pktPort);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
         return false;
@@ -195,31 +197,18 @@ Bus::recvTiming(PacketPtr pkt)
     // access has been handled twice.
     if (dest == Packet::Broadcast) {
         port = findPort(pkt->getAddr(), pkt->getSrc());
-        pkt->flags &= ~SNOOP_COMMIT;
-        if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) {
-            bool success;
-
-            pkt->flags |= SNOOP_COMMIT;
-            success = timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
-            assert(success);
-
-            if (pkt->flags & SATISFIED) {
-                //Cache-Cache transfer occuring
-                if (inRetry) {
-                    retryList.front()->onRetryList(false);
-                    retryList.pop_front();
-                    inRetry = false;
-                }
-                occupyBus(pkt);
-                DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n");
-                return true;
+        timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+
+        if (pkt->flags & SATISFIED) {
+            //Cache-Cache transfer occuring
+            if (inRetry) {
+                retryList.front()->onRetryList(false);
+                retryList.pop_front();
+                inRetry = false;
             }
-        } else {
-            //Snoop didn't succeed
-            DPRINTF(Bus, "Adding1 a retry to RETRY list %d\n",
-                    pktPort->getId());
-            addToRetryList(pktPort);
-            return false;
+            occupyBus(pkt);
+            DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n");
+            return true;
         }
     } else {
         assert(dest >= 0 && dest < maxId);
@@ -426,7 +415,6 @@ Bus::recvAtomic(PacketPtr pkt)
     DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
-    pkt->flags |= SNOOP_COMMIT;
 
     // Assume one bus cycle in order to get through.  This may have
     // some clock skew issues yet again...
@@ -451,7 +439,6 @@ Bus::recvFunctional(PacketPtr pkt)
     DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
-    pkt->flags |= SNOOP_COMMIT;
 
     Port* port = findPort(pkt->getAddr(), pkt->getSrc());
     functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 3ed4b84d1..b699271f7 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -40,29 +40,38 @@
 
 using namespace std;
 
-BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache,
-                                bool _isCpuSide)
-    : Port(_name, _cache), cache(_cache), isCpuSide(_isCpuSide)
+BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
+    : Port(_name, _cache), cache(_cache), otherPort(NULL)
 {
     blocked = false;
     waitingOnRetry = false;
-    //Start ports at null if more than one is created we should panic
-    //cpuSidePort = NULL;
-    //memSidePort = NULL;
 }
 
 
+BaseCache::BaseCache(const std::string &name, Params &params)
+    : MemObject(name),
+      blocked(0), blockedSnoop(0),
+      blkSize(params.blkSize),
+      missCount(params.maxMisses), drainEvent(NULL)
+{
+}
+
+
+
 void
 BaseCache::CachePort::recvStatusChange(Port::Status status)
 {
-    cache->recvStatusChange(status, isCpuSide);
+    if (status == Port::RangeChange) {
+        otherPort->sendStatusChange(Port::RangeChange);
+    }
 }
 
 void
 BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp,
                                        AddrRangeList &snoop)
 {
-    cache->getAddressRanges(resp, snoop, isCpuSide);
+    AddrRangeList dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
 }
 
 int
@@ -115,92 +124,99 @@ BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
         sendFunctional(pkt);
 }
 
+
 void
-BaseCache::CachePort::recvRetry()
+BaseCache::CachePort::respond(PacketPtr pkt, Tick time)
 {
-    PacketPtr pkt;
-    assert(waitingOnRetry);
-    if (!drainList.empty()) {
-        DPRINTF(CachePort, "%s attempting to send a retry for response (%i waiting)\n"
-                , name(), drainList.size());
-        //We have some responses to drain first
-        pkt = drainList.front();
-        drainList.pop_front();
-        if (sendTiming(pkt)) {
-            DPRINTF(CachePort, "%s sucessful in sending a retry for"
-                    "response (%i still waiting)\n", name(), drainList.size());
-            if (!drainList.empty() ||
-                !isCpuSide && cache->doMasterRequest() ||
-                isCpuSide && cache->doSlaveRequest()) {
-
-                DPRINTF(CachePort, "%s has more responses/requests\n", name());
-                new BaseCache::RequestEvent(this, curTick + 1);
-            }
-            waitingOnRetry = false;
-        }
-        else {
-            drainList.push_front(pkt);
+    assert(time >= curTick);
+    if (pkt->needsResponse()) {
+        if (transmitList.empty()) {
+            assert(!responseEvent->scheduled());
+            responseEvent->schedule(time);
+            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
+            return;
         }
-        // Check if we're done draining once this list is empty
-        if (drainList.empty())
-            cache->checkDrain();
-    }
-    else if (!isCpuSide)
-    {
-        DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
-        if (!cache->doMasterRequest()) {
-            //This can happen if I am the owner of a block and see an upgrade
-            //while the block was in my WB Buffers.  I just remove the
-            //wb and de-assert the masterRequest
-            waitingOnRetry = false;
+
+        // something is on the list and this belongs at the end
+        if (time >= transmitList.back().first) {
+            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
             return;
         }
-        pkt = cache->getPacket();
-        MSHR* mshr = (MSHR*) pkt->senderState;
-        //Copy the packet, it may be modified/destroyed elsewhere
-        PacketPtr copyPkt = new Packet(*pkt);
-        copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-        mshr->pkt = copyPkt;
-
-        bool success = sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-        waitingOnRetry = !success;
-        if (waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        // Something is on the list and this belongs somewhere else
+        std::list<std::pair<Tick,PacketPtr> >::iterator i =
+            transmitList.begin();
+        std::list<std::pair<Tick,PacketPtr> >::iterator end =
+            transmitList.end();
+        bool done = false;
+
+        while (i != end && !done) {
+            if (time < i->first) {
+                if (i == transmitList.begin()) {
+                    //Inserting at begining, reschedule
+                    responseEvent->reschedule(time);
+                }
+                transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
+                done = true;
+            }
+            i++;
+        }
+    }
+    else {
+        assert(0);
+        // this code was on the cpuSidePort only... do we still need it?
+        if (pkt->cmd != MemCmd::UpgradeReq)
+        {
+            delete pkt->req;
+            delete pkt;
         }
+    }
+}
 
-        cache->sendResult(pkt, mshr, success);
+bool
+BaseCache::CachePort::drainResponse()
+{
+    DPRINTF(CachePort,
+            "%s attempting to send a retry for response (%i waiting)\n",
+            name(), drainList.size());
+    //We have some responses to drain first
+    PacketPtr pkt = drainList.front();
+    if (sendTiming(pkt)) {
+        drainList.pop_front();
+        DPRINTF(CachePort, "%s sucessful in sending a retry for"
+                "response (%i still waiting)\n", name(), drainList.size());
+        if (!drainList.empty() || isBusRequested()) {
 
-        if (success && cache->doMasterRequest())
-        {
-            DPRINTF(CachePort, "%s has more requests\n", name());
-            //Still more to issue, rerequest in 1 cycle
-            new BaseCache::RequestEvent(this, curTick + 1);
+            DPRINTF(CachePort, "%s has more responses/requests\n", name());
+            return false;
         }
+    } else {
+        waitingOnRetry = true;
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
     }
-    else
-    {
-        assert(cache->doSlaveRequest());
-        //pkt = cache->getCoherencePacket();
-        //We save the packet, no reordering on CSHRS
-        pkt = cache->getCoherencePacket();
-        MSHR* cshr = (MSHR*)pkt->senderState;
-        bool success = sendTiming(pkt);
-        cache->sendCoherenceResult(pkt, cshr, success);
-        waitingOnRetry = !success;
-        if (success && cache->doSlaveRequest())
-        {
-            DPRINTF(CachePort, "%s has more requests\n", name());
-            //Still more to issue, rerequest in 1 cycle
-            new BaseCache::RequestEvent(this, curTick + 1);
+    return true;
+}
+
+
+bool
+BaseCache::CachePort::recvRetryCommon()
+{
+    assert(waitingOnRetry);
+    waitingOnRetry = false;
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
         }
+        // Check if we're done draining once this list is empty
+        if (drainList.empty()) {
+            cache->checkDrain();
+        }
+        return true;
     }
-    if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name());
-    else DPRINTF(CachePort, "%s no longer waiting on retry\n", name());
-    return;
+    return false;
 }
+
+
 void
 BaseCache::CachePort::setBlocked()
 {
@@ -225,143 +241,6 @@ BaseCache::CachePort::clearBlocked()
     }
 }
 
-BaseCache::RequestEvent::RequestEvent(CachePort *_cachePort, Tick when)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
-{
-    this->setFlags(AutoDelete);
-    schedule(when);
-}
-
-void
-BaseCache::RequestEvent::process()
-{
-    if (cachePort->waitingOnRetry) return;
-    //We have some responses to drain first
-    if (!cachePort->drainList.empty()) {
-        DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name());
-        if (cachePort->sendTiming(cachePort->drainList.front())) {
-            DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name());
-            cachePort->drainList.pop_front();
-            if (!cachePort->drainList.empty() ||
-                !cachePort->isCpuSide && cachePort->cache->doMasterRequest() ||
-                cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) {
-
-                DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name());
-                this->schedule(curTick + 1);
-            }
-        }
-        else {
-            cachePort->waitingOnRetry = true;
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        }
-    }
-    else if (!cachePort->isCpuSide)
-    {            //MSHR
-        DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name());
-        if (!cachePort->cache->doMasterRequest()) {
-            //This can happen if I am the owner of a block and see an upgrade
-            //while the block was in my WB Buffers.  I just remove the
-            //wb and de-assert the masterRequest
-            return;
-        }
-
-        PacketPtr pkt = cachePort->cache->getPacket();
-        MSHR* mshr = (MSHR*) pkt->senderState;
-        //Copy the packet, it may be modified/destroyed elsewhere
-        PacketPtr copyPkt = new Packet(*pkt);
-        copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-        mshr->pkt = copyPkt;
-
-        bool success = cachePort->sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-        cachePort->waitingOnRetry = !success;
-        if (cachePort->waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        }
-
-        cachePort->cache->sendResult(pkt, mshr, success);
-        if (success && cachePort->cache->doMasterRequest())
-        {
-            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
-                    cachePort->name());
-            //Still more to issue, rerequest in 1 cycle
-            this->schedule(curTick+1);
-        }
-    }
-    else
-    {
-        //CSHR
-        assert(cachePort->cache->doSlaveRequest());
-        PacketPtr pkt = cachePort->cache->getCoherencePacket();
-        MSHR* cshr = (MSHR*) pkt->senderState;
-        bool success = cachePort->sendTiming(pkt);
-        cachePort->cache->sendCoherenceResult(pkt, cshr, success);
-        cachePort->waitingOnRetry = !success;
-        if (cachePort->waitingOnRetry)
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        if (success && cachePort->cache->doSlaveRequest())
-        {
-            DPRINTF(CachePort, "%s still more CSHR requests to send\n",
-                    cachePort->name());
-            //Still more to issue, rerequest in 1 cycle
-            this->schedule(curTick+1);
-        }
-    }
-}
-
-const char *
-BaseCache::RequestEvent::description()
-{
-    return "Cache request event";
-}
-
-BaseCache::ResponseEvent::ResponseEvent(CachePort *_cachePort)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
-{
-}
-
-void
-BaseCache::ResponseEvent::process()
-{
-    assert(cachePort->transmitList.size());
-    assert(cachePort->transmitList.front().first <= curTick);
-    PacketPtr pkt = cachePort->transmitList.front().second;
-    cachePort->transmitList.pop_front();
-    if (!cachePort->transmitList.empty()) {
-        Tick time = cachePort->transmitList.front().first;
-        schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name());
-    if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) {
-        //Already have a list, just append
-        cachePort->drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name());
-    }
-    else if (!cachePort->sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name());
-        cachePort->drainList.push_back(pkt);
-        cachePort->waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (cachePort->drainList.empty() && cachePort->transmitList.empty())
-        cachePort->cache->checkDrain();
-}
-
-const char *
-BaseCache::ResponseEvent::description()
-{
-    return "Cache response event";
-}
 
 void
 BaseCache::init()
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index e45e36fa0..2d63945d9 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -26,6 +26,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -83,7 +85,10 @@ class BaseCache : public MemObject
         BaseCache *cache;
 
       protected:
-        CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide);
+        Event *responseEvent;
+
+        CachePort(const std::string &_name, BaseCache *_cache);
+
         virtual void recvStatusChange(Status status);
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
@@ -91,9 +96,11 @@ class BaseCache : public MemObject
 
         virtual int deviceBlockSize();
 
-        virtual void recvRetry();
+        bool recvRetryCommon();
 
       public:
+        void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; }
+
         void setBlocked();
 
         void clearBlocked();
@@ -104,65 +111,52 @@ class BaseCache : public MemObject
 
         bool canDrain() { return drainList.empty() && transmitList.empty(); }
 
+        bool drainResponse();
+
+        CachePort *otherPort;
+
         bool blocked;
 
         bool mustSendRetry;
 
-        bool isCpuSide;
-
         bool waitingOnRetry;
 
+        /**
+         * Bit vector for the outstanding requests for the master interface.
+         */
+        uint8_t requestCauses;
+
         std::list<PacketPtr> drainList;
 
         std::list<std::pair<Tick,PacketPtr> > transmitList;
-    };
 
-    struct RequestEvent : public Event
-    {
-        CachePort *cachePort;
+        bool isBusRequested() { return requestCauses != 0; }
 
-        RequestEvent(CachePort *_cachePort, Tick when);
-        void process();
-        const char *description();
-    };
+        // These need to be virtual since the Event objects depend on
+        // cache template parameters.
+        virtual void scheduleRequestEvent(Tick t) = 0;
 
-    struct ResponseEvent : public Event
-    {
-        CachePort *cachePort;
+        void requestBus(RequestCause cause, Tick time)
+        {
+            if (!isBusRequested() && !waitingOnRetry) {
+                scheduleRequestEvent(time);
+            }
+            requestCauses |= (1 << cause);
+        }
+
+        void deassertBusRequest(RequestCause cause)
+        {
+            requestCauses &= ~(1 << cause);
+        }
 
-        ResponseEvent(CachePort *_cachePort);
-        void process();
-        const char *description();
+        void respond(PacketPtr pkt, Tick time);
     };
 
   public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
     CachePort *memSidePort;
 
-    ResponseEvent *sendEvent;
-    ResponseEvent *memSendEvent;
-
   private:
-    void recvStatusChange(Port::Status status, bool isCpuSide)
-    {
-        if (status == Port::RangeChange){
-            if (!isCpuSide) {
-                cpuSidePort->sendStatusChange(Port::RangeChange);
-            }
-            else {
-                memSidePort->sendStatusChange(Port::RangeChange);
-            }
-        }
-    }
-
-    virtual PacketPtr getPacket() = 0;
-
-    virtual PacketPtr getCoherencePacket() = 0;
-
-    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0;
-
-    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0;
-
     /**
      * Bit vector of the blocking reasons for the access path.
      * @sa #BlockedCause
@@ -175,16 +169,6 @@ class BaseCache : public MemObject
      */
     uint8_t blockedSnoop;
 
-    /**
-     * Bit vector for the outstanding requests for the master interface.
-     */
-    uint8_t masterRequests;
-
-    /**
-     * Bit vector for the outstanding requests for the slave interface.
-     */
-    uint8_t slaveRequests;
-
   protected:
 
     /** Stores time the cache blocked for statistics. */
@@ -309,20 +293,10 @@ class BaseCache : public MemObject
      * of this cache.
      * @param params The parameter object for this BaseCache.
      */
-    BaseCache(const std::string &name, Params &params)
-        : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0),
-          slaveRequests(0), blkSize(params.blkSize),
-          missCount(params.maxMisses), drainEvent(NULL)
-    {
-        //Start ports at null if more than one is created we should panic
-        cpuSidePort = NULL;
-        memSidePort = NULL;
-    }
+    BaseCache(const std::string &name, Params &params);
 
     ~BaseCache()
     {
-        delete sendEvent;
-        delete memSendEvent;
     }
 
     virtual void init();
@@ -422,12 +396,12 @@ class BaseCache : public MemObject
     }
 
     /**
-     * True if the master bus should be requested.
+     * True if the memory-side bus should be requested.
      * @return True if there are outstanding requests for the master bus.
      */
-    bool doMasterRequest()
+    bool isMemSideBusRequested()
     {
-        return masterRequests != 0;
+        return memSidePort->isBusRequested();
     }
 
     /**
@@ -435,59 +409,18 @@ class BaseCache : public MemObject
      * @param cause The reason for the request.
      * @param time The time to make the request.
      */
-    void setMasterRequest(RequestCause cause, Tick time)
+    void requestMemSideBus(RequestCause cause, Tick time)
     {
-        if (!doMasterRequest() && !memSidePort->waitingOnRetry)
-        {
-            new RequestEvent(memSidePort, time);
-        }
-        uint8_t flag = 1<<cause;
-        masterRequests |= flag;
+        memSidePort->requestBus(cause, time);
     }
 
     /**
      * Clear the master bus request for the given cause.
      * @param cause The request reason to clear.
      */
-    void clearMasterRequest(RequestCause cause)
+    void deassertMemSideBusRequest(RequestCause cause)
     {
-        uint8_t flag = 1<<cause;
-        masterRequests &= ~flag;
-        checkDrain();
-    }
-
-    /**
-     * Return true if the slave bus should be requested.
-     * @return True if there are outstanding requests for the slave bus.
-     */
-    bool doSlaveRequest()
-    {
-        return slaveRequests != 0;
-    }
-
-    /**
-     * Request the slave bus for the given reason and time.
-     * @param cause The reason for the request.
-     * @param time The time to make the request.
-     */
-    void setSlaveRequest(RequestCause cause, Tick time)
-    {
-        if (!doSlaveRequest() && !cpuSidePort->waitingOnRetry)
-        {
-            new RequestEvent(cpuSidePort, time);
-        }
-        uint8_t flag = 1<<cause;
-        slaveRequests |= flag;
-    }
-
-    /**
-     * Clear the slave bus request for the given reason.
-     * @param cause The request reason to clear.
-     */
-    void clearSlaveRequest(RequestCause cause)
-    {
-        uint8_t flag = 1<<cause;
-        slaveRequests &= ~flag;
+        memSidePort->deassertBusRequest(cause);
         checkDrain();
     }
 
@@ -498,111 +431,7 @@ class BaseCache : public MemObject
      */
     void respond(PacketPtr pkt, Tick time)
     {
-        assert(time >= curTick);
-        if (pkt->needsResponse()) {
-/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-            reqCpu->schedule(time);
-*/
-            if (cpuSidePort->transmitList.empty()) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-
-            // something is on the list and this belongs at the end
-            if (time >= cpuSidePort->transmitList.back().first) {
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-            // Something is on the list and this belongs somewhere else
-            std::list<std::pair<Tick,PacketPtr> >::iterator i =
-                cpuSidePort->transmitList.begin();
-            std::list<std::pair<Tick,PacketPtr> >::iterator end =
-                cpuSidePort->transmitList.end();
-            bool done = false;
-
-            while (i != end && !done) {
-                if (time < i->first) {
-                    if (i == cpuSidePort->transmitList.begin()) {
-                        //Inserting at begining, reschedule
-                        sendEvent->reschedule(time);
-                    }
-                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
-                                                     (time,pkt));
-                    done = true;
-                }
-                i++;
-            }
-        }
-        else {
-            if (pkt->cmd != MemCmd::UpgradeReq)
-            {
-                delete pkt->req;
-                delete pkt;
-            }
-        }
-    }
-
-    /**
-     * Send a reponse to the slave interface and calculate miss latency.
-     * @param pkt The request to respond to.
-     * @param time The time the response is ready.
-     */
-    void respondToMiss(PacketPtr pkt, Tick time)
-    {
-        assert(time >= curTick);
-        if (!pkt->req->isUncacheable()) {
-            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                time - pkt->time;
-        }
-        if (pkt->needsResponse()) {
-/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-            reqCpu->schedule(time);
-*/
-            if (cpuSidePort->transmitList.empty()) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-
-            // something is on the list and this belongs at the end
-            if (time >= cpuSidePort->transmitList.back().first) {
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-            // Something is on the list and this belongs somewhere else
-            std::list<std::pair<Tick,PacketPtr> >::iterator i =
-                cpuSidePort->transmitList.begin();
-            std::list<std::pair<Tick,PacketPtr> >::iterator end =
-                cpuSidePort->transmitList.end();
-            bool done = false;
-
-            while (i != end && !done) {
-                if (time < i->first) {
-                    if (i == cpuSidePort->transmitList.begin()) {
-                        //Inserting at begining, reschedule
-                        sendEvent->reschedule(time);
-                    }
-                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
-                                                     (time,pkt));
-                    done = true;
-                }
-                i++;
-            }
-        }
-        else {
-            if (pkt->cmd != MemCmd::UpgradeReq)
-            {
-                delete pkt->req;
-                delete pkt;
-            }
-        }
+        cpuSidePort->respond(pkt, time);
     }
 
     /**
@@ -611,65 +440,7 @@ class BaseCache : public MemObject
      */
     void respondToSnoop(PacketPtr pkt, Tick time)
     {
-        assert(time >= curTick);
-        assert (pkt->needsResponse());
-/*        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
-        reqMem->schedule(time);
-*/
-        if (memSidePort->transmitList.empty()) {
-            assert(!memSendEvent->scheduled());
-            memSendEvent->schedule(time);
-            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                (time,pkt));
-            return;
-        }
-
-        // something is on the list and this belongs at the end
-        if (time >= memSidePort->transmitList.back().first) {
-            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                (time,pkt));
-            return;
-        }
-        // Something is on the list and this belongs somewhere else
-        std::list<std::pair<Tick,PacketPtr> >::iterator i =
-            memSidePort->transmitList.begin();
-        std::list<std::pair<Tick,PacketPtr> >::iterator end =
-            memSidePort->transmitList.end();
-        bool done = false;
-
-        while (i != end && !done) {
-            if (time < i->first) {
-                if (i == memSidePort->transmitList.begin()) {
-                    //Inserting at begining, reschedule
-                    memSendEvent->reschedule(time);
-                }
-                memSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
-                done = true;
-            }
-            i++;
-        }
-    }
-
-    /**
-     * Notification from master interface that a address range changed. Nothing
-     * to do for a cache.
-     */
-    void rangeChange() {}
-
-    void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide)
-    {
-        if (isCpuSide)
-        {
-            AddrRangeList dummy;
-            memSidePort->getPeerAddressRanges(resp, dummy);
-        }
-        else
-        {
-            //This is where snoops get updated
-            AddrRangeList dummy;
-            cpuSidePort->getPeerAddressRanges(dummy, snoop);
-            return;
-        }
+        memSidePort->respond(pkt, time);
     }
 
     virtual unsigned int drain(Event *de);
@@ -686,7 +457,7 @@ class BaseCache : public MemObject
 
     bool canDrain()
     {
-        if (doMasterRequest() || doSlaveRequest()) {
+        if (isMemSideBusRequested()) {
             return false;
         } else if (memSidePort && !memSidePort->canDrain()) {
             return false;
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index cb4e7f62e..2b4e7b9c8 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -61,7 +61,6 @@
 #include "mem/cache/miss/miss_queue.hh"
 #include "mem/cache/miss/blocking_buffer.hh"
 
-#include "mem/cache/coherence/uni_coherence.hh"
 #include "mem/cache/coherence/simple_coherence.hh"
 
 #include "mem/cache/cache_impl.hh"
@@ -72,27 +71,22 @@
 
 #if defined(USE_CACHE_FALRU)
 template class Cache<FALRU, SimpleCoherence>;
-template class Cache<FALRU, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_IIC)
 template class Cache<IIC, SimpleCoherence>;
-template class Cache<IIC, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_LRU)
 template class Cache<LRU, SimpleCoherence>;
-template class Cache<LRU, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_SPLIT)
 template class Cache<Split, SimpleCoherence>;
-template class Cache<Split, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
 template class Cache<SplitLIFO, SimpleCoherence>;
-template class Cache<SplitLIFO, UniCoherence>;
 #endif
 
 #endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 3e45c85d2..75fb50f4e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -28,6 +28,7 @@
  * Authors: Erik Hallnor
  *          Dave Greene
  *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -46,6 +47,8 @@
 #include "mem/cache/cache_blk.hh"
 #include "mem/cache/miss/miss_buffer.hh"
 
+#include "sim/eventq.hh"
+
 //Forward decleration
 class MSHR;
 class BasePrefetcher;
@@ -83,11 +86,26 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
+        void processRequestEvent();
+        void processResponseEvent();
+
         virtual bool recvTiming(PacketPtr pkt);
 
+        virtual void recvRetry();
+
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
+
+        typedef EventWrapper<CpuSidePort, &CpuSidePort::processResponseEvent>
+                ResponseEvent;
+
+        typedef EventWrapper<CpuSidePort, &CpuSidePort::processRequestEvent>
+                RequestEvent;
+
+        virtual void scheduleRequestEvent(Tick t) {
+            new RequestEvent(this, t);
+        }
     };
 
     class MemSidePort : public CachePort
@@ -103,11 +121,26 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
+        void processRequestEvent();
+        void processResponseEvent();
+
         virtual bool recvTiming(PacketPtr pkt);
 
+        virtual void recvRetry();
+
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
+
+        typedef EventWrapper<MemSidePort, &MemSidePort::processResponseEvent>
+                ResponseEvent;
+
+        typedef EventWrapper<MemSidePort, &MemSidePort::processRequestEvent>
+                RequestEvent;
+
+        virtual void scheduleRequestEvent(Tick t) {
+            new RequestEvent(this, t);
+        }
     };
 
     /** Tag and data Storage */
@@ -339,8 +372,6 @@ class Cache : public BaseCache
     virtual Port *getPort(const std::string &if_name, int idx = -1);
     virtual void deletePortRefs(Port *p);
 
-    virtual void recvStatusChange(Port::Status status, bool isCpuSide);
-
     void regStats();
 
     /**
@@ -354,21 +385,14 @@ class Cache : public BaseCache
      * Selects a request to send on the bus.
      * @return The memory request to service.
      */
-    virtual PacketPtr getPacket();
+    PacketPtr getPacket();
 
     /**
      * Was the request was sent successfully?
      * @param pkt The request.
      * @param success True if the request was sent successfully.
      */
-    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
-
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* cshr, bool success);
+    void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
 
     /**
      * Handles a response (cache line fill/write ack) from the bus.
@@ -376,12 +400,6 @@ class Cache : public BaseCache
      */
     void handleResponse(PacketPtr &pkt);
 
-    /**
-     * Selects a coherence message to forward to lower levels of the hierarchy.
-     * @return The coherence message to forward.
-     */
-    virtual PacketPtr getCoherencePacket();
-
     /**
      * Snoops bus transactions to maintain coherence.
      * @param pkt The current bus transaction.
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index e887f711e..bc1a8a775 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -75,7 +75,6 @@
 #include "mem/cache/miss/blocking_buffer.hh"
 
 // Coherence Templates
-#include "mem/cache/coherence/uni_coherence.hh"
 #include "mem/cache/coherence/simple_coherence.hh"
 
 //Prefetcher Headers
@@ -302,13 +301,8 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
     } while (0)
 
 #define BUILD_COHERENCE(b) do {						\
-        if (protocol == NULL) {						\
-            UniCoherence *coh = new UniCoherence();			\
-            BUILD_CACHES(UniCoherence);				\
-        } else {							\
-            SimpleCoherence *coh = new SimpleCoherence(protocol);	\
-            BUILD_CACHES(SimpleCoherence);				\
-        }								\
+        SimpleCoherence *coh = new SimpleCoherence(protocol);           \
+        BUILD_CACHES(SimpleCoherence);                                  \
     } while (0)
 
 #if defined(USE_TAGGED)
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9b094c1e3..6b9eac865 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -28,6 +28,8 @@
  * Authors: Erik Hallnor
  *          Dave Greene
  *          Nathan Binkert
+ *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -57,18 +59,8 @@
 bool SIGNAL_NACK_HACK;
 
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::
-recvStatusChange(Port::Status status, bool isCpuSide)
-{
-
-}
-
-
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-Cache(const std::string &_name,
-      Cache<TagStore,Coherence>::Params &params)
+Cache<TagStore,Coherence>::Cache(const std::string &_name,
+                                 Cache<TagStore,Coherence>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
       tags(params.tags), missQueue(params.missQueue),
@@ -84,6 +76,11 @@ Cache(const std::string &_name,
       adaptiveCompression(params.adaptiveCompression),
       writebackCompressed(params.writebackCompressed)
 {
+    cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
+    memSidePort = new MemSidePort(_name + "-mem_side_port", this);
+    cpuSidePort->setOtherPort(memSidePort);
+    memSidePort->setOtherPort(cpuSidePort);
+
     tags->setCache(this);
     missQueue->setCache(this);
     missQueue->setPrefetcher(prefetcher);
@@ -406,7 +403,11 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
 //            mshr->pkt = pkt;
             break;
         }
-        respondToMiss(target, completion_time);
+        if (!target->req->isUncacheable()) {
+            missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
+        }
+        respond(target, completion_time);
         mshr->popTarget();
     }
 
@@ -688,7 +689,7 @@ Cache<TagStore,Coherence>::getPacket()
         }
     }
 
-    assert(!doMasterRequest() || missQueue->havePending());
+    assert(!isMemSideBusRequested() || missQueue->havePending());
     assert(!pkt || pkt->time <= curTick);
     SIGNAL_NACK_HACK = false;
     return pkt;
@@ -727,7 +728,6 @@ Cache<TagStore,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
         pkt->flags &= ~NACKED_LINE;
         SIGNAL_NACK_HACK = false;
         pkt->flags &= ~SATISFIED;
-        pkt->flags &= ~SNOOP_COMMIT;
 
 //Rmove copy from mshr
         delete mshr->pkt;
@@ -783,22 +783,6 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr &pkt)
     }
 }
 
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::getCoherencePacket()
-{
-    return coherence->getPacket();
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::sendCoherenceResult(PacketPtr &pkt,
-                                                         MSHR *cshr,
-                                                         bool success)
-{
-    coherence->sendResult(pkt, cshr, success);
-}
-
 
 template<class TagStore, class Coherence>
 void
@@ -1146,27 +1130,15 @@ template<class TagStore, class Coherence>
 Port *
 Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
 {
-    if (if_name == "" || if_name == "cpu_side")
-    {
-        if (cpuSidePort == NULL) {
-            cpuSidePort = new CpuSidePort(name() + "-cpu_side_port", this);
-            sendEvent = new ResponseEvent(cpuSidePort);
-        }
+    if (if_name == "" || if_name == "cpu_side") {
         return cpuSidePort;
-    }
-    else if (if_name == "functional")
-    {
-        return new CpuSidePort(name() + "-cpu_side_funcport", this);
-    }
-    else if (if_name == "mem_side")
-    {
-        if (memSidePort != NULL)
-            panic("Already have a mem side for this cache\n");
-        memSidePort = new MemSidePort(name() + "-mem_side_port", this);
-        memSendEvent = new ResponseEvent(memSidePort);
+    } else if (if_name == "mem_side") {
         return memSidePort;
+    } else if (if_name == "functional") {
+        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    } else {
+        panic("Port name %s unrecognized\n", if_name);
     }
-    else panic("Port name %s unrecognized\n", if_name);
 }
 
 template<class TagStore, class Coherence>
@@ -1213,6 +1185,68 @@ Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
     return true;
 }
 
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::recvRetry()
+{
+    recvRetryCommon();
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::processRequestEvent()
+{
+    if (waitingOnRetry)
+        return;
+    //We have some responses to drain first
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
+        }
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::processResponseEvent()
+{
+    assert(transmitList.size());
+    assert(transmitList.front().first <= curTick);
+    PacketPtr pkt = transmitList.front().second;
+    transmitList.pop_front();
+    if (!transmitList.empty()) {
+        Tick time = transmitList.front().first;
+        responseEvent->schedule(time <= curTick ? curTick+1 : time);
+    }
+
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
+    pkt->makeTimingResponse();
+    DPRINTF(CachePort, "%s attempting to send a response\n", name());
+    if (!drainList.empty() || waitingOnRetry) {
+        //Already have a list, just append
+        drainList.push_back(pkt);
+        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
+    }
+    else if (!sendTiming(pkt)) {
+        //It failed, save it to list of drain events
+        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
+        drainList.push_back(pkt);
+        waitingOnRetry = true;
+    }
+
+    // Check if we're done draining once this list is empty
+    if (drainList.empty() && transmitList.empty())
+        myCache()->checkDrain();
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
@@ -1249,23 +1283,149 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
     if (pkt->result == Packet::Nacked)
         panic("Need to implement cache resending nacked packets!\n");
 
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isResponse())
+    if (pkt->isResponse()) {
         myCache()->handleResponse(pkt);
-    else {
-        //Check if we should do the snoop
-        if (pkt->flags & SNOOP_COMMIT)
-            myCache()->snoop(pkt);
+    } else {
+        myCache()->snoop(pkt);
     }
     return true;
 }
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+{
+    if (recvRetryCommon()) {
+        return;
+    }
+
+    DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
+    if (!cache->isMemSideBusRequested()) {
+        //This can happen if I am the owner of a block and see an upgrade
+        //while the block was in my WB Buffers.  I just remove the
+        //wb and de-assert the masterRequest
+        waitingOnRetry = false;
+        return;
+    }
+    PacketPtr pkt = myCache()->getPacket();
+    MSHR* mshr = (MSHR*) pkt->senderState;
+    //Copy the packet, it may be modified/destroyed elsewhere
+    PacketPtr copyPkt = new Packet(*pkt);
+    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+    mshr->pkt = copyPkt;
+
+    bool success = sendTiming(pkt);
+    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+
+    waitingOnRetry = !success;
+    if (waitingOnRetry) {
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+    }
+
+    myCache()->sendResult(pkt, mshr, success);
+
+    if (success && cache->isMemSideBusRequested())
+    {
+        DPRINTF(CachePort, "%s has more requests\n", name());
+        //Still more to issue, rerequest in 1 cycle
+        new RequestEvent(this, curTick + 1);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::processRequestEvent()
+{
+    if (waitingOnRetry)
+        return;
+    //We have some responses to drain first
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
+        }
+        return;
+    }
+
+    DPRINTF(CachePort, "%s trying to send a MSHR request\n", name());
+    if (!isBusRequested()) {
+        //This can happen if I am the owner of a block and see an upgrade
+        //while the block was in my WB Buffers.  I just remove the
+        //wb and de-assert the masterRequest
+        return;
+    }
+
+    PacketPtr pkt = myCache()->getPacket();
+    MSHR* mshr = (MSHR*) pkt->senderState;
+    //Copy the packet, it may be modified/destroyed elsewhere
+    PacketPtr copyPkt = new Packet(*pkt);
+    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+    mshr->pkt = copyPkt;
+
+    bool success = sendTiming(pkt);
+    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+
+    waitingOnRetry = !success;
+    if (waitingOnRetry) {
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+    }
+
+    myCache()->sendResult(pkt, mshr, success);
+    if (success && isBusRequested())
+    {
+        DPRINTF(CachePort, "%s still more MSHR requests to send\n", name());
+        //Still more to issue, rerequest in 1 cycle
+        scheduleRequestEvent(curTick+1);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::processResponseEvent()
+{
+    assert(transmitList.size());
+    assert(transmitList.front().first <= curTick);
+    PacketPtr pkt = transmitList.front().second;
+    transmitList.pop_front();
+    if (!transmitList.empty()) {
+        Tick time = transmitList.front().first;
+        responseEvent->schedule(time <= curTick ? curTick+1 : time);
+    }
+
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
+    pkt->makeTimingResponse();
+    DPRINTF(CachePort, "%s attempting to send a response\n", name());
+    if (!drainList.empty() || waitingOnRetry) {
+        //Already have a list, just append
+        drainList.push_back(pkt);
+        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
+    }
+    else if (!sendTiming(pkt)) {
+        //It failed, save it to list of drain events
+        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
+        drainList.push_back(pkt);
+        waitingOnRetry = true;
+    }
+
+    // Check if we're done draining once this list is empty
+    if (drainList.empty() && transmitList.empty())
+        myCache()->checkDrain();
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
@@ -1292,15 +1452,17 @@ template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 CpuSidePort::CpuSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, true)
+    : BaseCache::CachePort(_name, _cache)
 {
+    responseEvent = new ResponseEvent(this);
 }
 
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 MemSidePort::MemSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, false)
+    : BaseCache::CachePort(_name, _cache)
 {
+    responseEvent = new ResponseEvent(this);
 }
 
diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript
index 03a2d85d7..7b94f73e1 100644
--- a/src/mem/cache/coherence/SConscript
+++ b/src/mem/cache/coherence/SConscript
@@ -31,5 +31,4 @@
 Import('*')
 
 Source('coherence_protocol.cc')
-Source('uni_coherence.cc')
 
diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc
deleted file mode 100644
index 6061c89c3..000000000
--- a/src/mem/cache/coherence/uni_coherence.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#include "mem/cache/coherence/uni_coherence.hh"
-#include "mem/cache/base_cache.hh"
-
-#include "base/trace.hh"
-
-using namespace std;
-
-UniCoherence::UniCoherence()
-    : cshrs(50)
-{
-}
-
-PacketPtr
-UniCoherence::getPacket()
-{
-    PacketPtr pkt = cshrs.getReq();
-    return pkt;
-}
-
-void
-UniCoherence::sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
-{
-    if (success)
-    {
-        bool unblock = cshrs.isFull();
-//        cshrs.markInService(cshr);
-        delete pkt->req;
-        cshrs.deallocate(cshr);
-        if (!cshrs.havePending()) {
-            cache->clearSlaveRequest(Request_Coherence);
-        }
-        if (unblock) {
-            //since CSHRs are always used as buffers, should always get rid of one
-            assert(!cshrs.isFull());
-            cache->clearBlocked(Blocked_Coherence);
-        }
-    }
-}
-
-
-/**
- * @todo add support for returning slave requests, not doing them here.
- */
-bool
-UniCoherence::handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                               CacheBlk::State &new_state)
-{
-    new_state = 0;
-    if (pkt->isInvalidate()) {
-            DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n",
-                    pkt->getAddr(), blk);
-    }
-    else if (blk) {
-        new_state = blk->status;
-        if (pkt->isRead()) {
-            DPRINTF(Cache, "Uni-coherence snoops a read that hit in itself"
-                    ". Should satisfy the packet\n");
-            return true; //Satisfy Reads if we can
-        }
-    }
-    return false;
-}
-
-bool
-UniCoherence::propogateInvalidate(PacketPtr pkt, bool isTiming)
-{
-    if (pkt->isInvalidate()) {
-/*  Temp Fix for now, forward all invalidates up as functional accesses */
-        if (isTiming) {
-            // Forward to other caches
-            Request* req = new Request(pkt->req->getPaddr(), pkt->getSize(), 0);
-            PacketPtr tmp = new Packet(req, MemCmd::InvalidateReq, -1);
-            cshrs.allocate(tmp);
-            cache->setSlaveRequest(Request_Coherence, curTick);
-            if (cshrs.isFull())
-                cache->setBlockedForSnoop(Blocked_Coherence);
-        }
-        else {
-            PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1);
-            cache->cpuSidePort->sendAtomic(tmp);
-            delete tmp;
-        }
-/**/
-/*            PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1);
-            cache->cpuSidePort->sendFunctional(tmp);
-            delete tmp;
-*/
-    }
-    if (pkt->isRead()) {
-        /*For now we will see if someone above us has the data by
-          doing a functional access on reads.  Fix this later */
-            PacketPtr tmp = new Packet(pkt->req, MemCmd::ReadReq, -1);
-            tmp->allocate();
-            cache->cpuSidePort->sendFunctional(tmp);
-            bool hit = (tmp->result == Packet::Success);
-            if (hit) {
-                memcpy(pkt->getPtr<uint8_t>(), tmp->getPtr<uint8_t>(),
-                       pkt->getSize());
-                DPRINTF(Cache, "Uni-coherence snoops a read that hit in L1\n");
-            }
-            delete tmp;
-            return hit;
-    }
-    return false;
-}
diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh
deleted file mode 100644
index 9efb4e192..000000000
--- a/src/mem/cache/coherence/uni_coherence.hh
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#ifndef __UNI_COHERENCE_HH__
-#define __UNI_COHERENCE_HH__
-
-#include "base/trace.hh"
-#include "base/misc.hh"
-#include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "mem/packet.hh"
-
-class BaseCache;
-
-class UniCoherence
-{
-  protected:
-    /** Buffers to hold forwarded invalidates. */
-    MSHRQueue cshrs;
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-
-  public:
-    /**
-     * Construct and initialize this coherence policy.
-     */
-    UniCoherence();
-
-    /**
-     * Set the pointer to the parent cache.
-     * @param _cache The parent cache.
-     */
-    void setCache(BaseCache *_cache)
-    {
-        cache = _cache;
-    }
-
-    /**
-     * Register statistics.
-     * @param name The name to prepend to stat descriptions.
-     */
-    void regStats(const std::string &name)
-    {
-    }
-
-    /**
-     * Return Read.
-     * @param cmd The request's command.
-     * @param state The current state of the cache block.
-     * @return The proper bus command, as determined by the protocol.
-     * @todo Make changes so writebacks don't get here.
-     */
-    MemCmd getBusCmd(MemCmd cmd, CacheBlk::State state)
-    {
-        if (cmd == MemCmd::HardPFReq && state)
-            warn("Trying to issue a prefetch to a block we already have\n");
-        if (cmd == MemCmd::Writeback)
-            return MemCmd::Writeback;
-        return MemCmd::ReadReq;
-    }
-
-    /**
-     * Just return readable and writeable.
-     * @param pkt The bus response.
-     * @param current The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
-    {
-        if (pkt->senderState) //Blocking Buffers don't get mshrs
-        {
-            if (((MSHR *)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) {
-                DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n");
-                return BlkHWPrefetched | BlkValid | BlkWritable;
-            }
-            else {
-                return BlkValid | BlkWritable;
-            }
-        }
-        //@todo What about prefetching with blocking buffers
-        else
-            return BlkValid | BlkWritable;
-    }
-
-    /**
-     * Return outstanding invalidate to forward.
-     * @return The next invalidate to forward to lower levels of cache.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success);
-
-    /**
-     * Handle snooped bus requests.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state The new coherence state of the block.
-     * @return True if the request should be satisfied locally.
-     */
-    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                          CacheBlk::State &new_state);
-
-    /**
-     * Return true if this coherence policy can handle fast cache writes.
-     */
-    bool allowFastWrites() { return true; }
-
-    bool hasProtocol() { return false; }
-
-    bool propogateInvalidate(PacketPtr pkt, bool isTiming);
-};
-
-#endif //__UNI_COHERENCE_HH__
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
index e8ff26880..281328c2e 100644
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -64,7 +64,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
         std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size);
 
         cache->setBlocked(Blocked_NoWBBuffers);
-        cache->setMasterRequest(Request_WB, time);
+        cache->requestMemSideBus(Request_WB, time);
         return;
     }
 
@@ -77,7 +77,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
         miss.pkt->flags |= CACHE_LINE_FILL;
     }
     cache->setBlocked(Blocked_NoMSHRs);
-    cache->setMasterRequest(Request_MSHR, time);
+    cache->requestMemSideBus(Request_MSHR, time);
 }
 
 PacketPtr
@@ -111,7 +111,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
         // Forwarding a write/ writeback, don't need to change
         // the command
         assert(mshr == &wb);
-        cache->clearMasterRequest(Request_WB);
+        cache->deassertMemSideBusRequest(Request_WB);
         if (!pkt->needsResponse()) {
             assert(wb.getNumTargets() == 0);
             wb.deallocate();
@@ -121,7 +121,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
         }
     } else {
         assert(mshr == &miss);
-        cache->clearMasterRequest(Request_MSHR);
+        cache->deassertMemSideBusRequest(Request_MSHR);
         if (!pkt->needsResponse()) {
             assert(miss.getNumTargets() == 0);
             miss.deallocate();
@@ -178,7 +178,7 @@ BlockingBuffer::squash(int threadNum)
         if (!miss.inService) {
             miss.deallocate();
             cache->clearBlocked(Blocked_NoMSHRs);
-            cache->clearMasterRequest(Request_MSHR);
+            cache->deassertMemSideBusRequest(Request_MSHR);
         }
     }
 }
@@ -203,7 +203,7 @@ BlockingBuffer::doWriteback(Addr addr,
     writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     wb.allocateAsBuffer(pkt);
-    cache->setMasterRequest(Request_WB, curTick);
+    cache->requestMemSideBus(Request_WB, curTick);
     cache->setBlocked(Blocked_NoWBBuffers);
 }
 
@@ -221,7 +221,7 @@ BlockingBuffer::doWriteback(PacketPtr &pkt)
     std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
 
     cache->setBlocked(Blocked_NoWBBuffers);
-    cache->setMasterRequest(Request_WB, curTick);
+    cache->requestMemSideBus(Request_WB, curTick);
 }
 
 
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 24ca9cfa2..67036ed02 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -348,7 +348,7 @@ MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time)
     }
     if (pkt->cmd != MemCmd::HardPFReq) {
         //If we need to request the bus (not on HW prefetch), do so
-        cache->setMasterRequest(Request_MSHR, time);
+        cache->requestMemSideBus(Request_MSHR, time);
     }
     return mshr;
 }
@@ -376,7 +376,7 @@ MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time)
         cache->setBlocked(Blocked_NoWBBuffers);
     }
 
-    cache->setMasterRequest(Request_WB, time);
+    cache->requestMemSideBus(Request_WB, time);
 
     return mshr;
 }
@@ -450,7 +450,7 @@ MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
     if (mq.isFull()) {
         cache->setBlocked(Blocked_NoMSHRs);
     }
-    cache->setMasterRequest(Request_MSHR, time);
+    cache->requestMemSideBus(Request_MSHR, time);
     return mshr;
 }
 
@@ -534,7 +534,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
         unblock = wb.isFull();
         wb.markInService(mshr);
         if (!wb.havePending()){
-            cache->clearMasterRequest(Request_WB);
+            cache->deassertMemSideBusRequest(Request_WB);
         }
         if (unblock) {
             // Do we really unblock?
@@ -545,7 +545,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
         unblock = mq.isFull();
         mq.markInService(mshr);
         if (!mq.havePending()){
-            cache->clearMasterRequest(Request_MSHR);
+            cache->deassertMemSideBusRequest(Request_MSHR);
         }
         if (mshr->originalCmd == MemCmd::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -553,7 +553,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
             //Also clear pending if need be
             if (!prefetcher->havePending())
             {
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
             }
         }
         if (unblock) {
@@ -602,7 +602,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time)
             mshr->pkt->req = mshr->getTarget()->req;
             mq.markPending(mshr, cmd);
             mshr->order = order++;
-            cache->setMasterRequest(Request_MSHR, time);
+            cache->requestMemSideBus(Request_MSHR, time);
         }
         else {
             unblock = mq.isFull();
@@ -683,7 +683,7 @@ MissQueue::squash(int threadNum)
     }
     mq.squash(threadNum);
     if (!mq.havePending()) {
-        cache->clearMasterRequest(Request_MSHR);
+        cache->deassertMemSideBusRequest(Request_MSHR);
     }
     if (unblock && !mq.isFull()) {
         cache->clearBlocked(cause);
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index 44daf75e1..966f7d005 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
             keepTrying = cache->inCache(pkt->getAddr());
         }
         if (pf.empty()) {
-            cache->clearMasterRequest(Request_PF);
+            cache->deassertMemSideBusRequest(Request_PF);
             if (keepTrying) return NULL; //None left, all were in cache
         }
     } while (keepTrying);
@@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pfRemovedMSHR++;
             pf.erase(iter);
             if (pf.empty())
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
         }
 
         //Remove anything in queue with delay older than time
@@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                 iter--;
             }
             if (pf.empty())
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
         }
 
 
@@ -244,7 +244,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             prefetch->flags |= CACHE_LINE_FILL;
 
             //Make sure to request the bus, with proper delay
-            cache->setMasterRequest(Request_PF, prefetch->time);
+            cache->requestMemSideBus(Request_PF, prefetch->time);
 
             //Increment through the list
             addr++;
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index dc23e9f6d..577f99116 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -61,8 +61,8 @@ typedef std::list<PacketPtr> PacketList;
 #define CACHE_LINE_FILL (1 << 3)
 #define COMPRESSED      (1 << 4)
 #define NO_ALLOCATE     (1 << 5)
-#define SNOOP_COMMIT    (1 << 6)
 
+#define EXPRESS_SNOOP   (1 << 7)
 
 class MemCmd
 {
-- 
cgit v1.2.3


From 9048c695a0ecde709a074259bad9ad1cda57a303 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 06:29:48 -0700
Subject: Another pass of minor changes in preparation for new protocol.

src/mem/cache/cache_impl.hh:
src/mem/cache/coherence/simple_coherence.hh:
    Get rid of old invalidate propagation logic in preparation
    for new multilevel snoop protocol.
src/mem/cache/coherence/coherence_protocol.cc:
    L2 cache now has protocol, so protocol must handle ReadExReq
    coming in from the CPU side.
src/mem/cache/miss/mshr_queue.cc:
    Assertion is failing, so let's take it out for now.
src/mem/packet.cc:
src/mem/packet.hh:
    Add WritebackAck command.
    Reorganize enum to put responses next to corresponding requests.
    Get rid of unused WriteReqNoAck.

--HG--
extra : convert_revision : 24c519846d161978123f9aa029ae358a41546c73
---
 src/mem/cache/cache_impl.hh                   | 17 ++---------------
 src/mem/cache/coherence/coherence_protocol.cc |  3 +++
 src/mem/cache/coherence/simple_coherence.hh   |  6 ------
 src/mem/cache/miss/mshr_queue.cc              |  1 -
 src/mem/packet.cc                             | 11 ++++++-----
 src/mem/packet.hh                             |  4 ++--
 6 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 6b9eac865..56352c110 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -794,14 +794,7 @@ Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
         return;
     }
 
-    //Send a timing (true) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, true)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
+    ///// PROPAGATE SNOOP UPWARD HERE
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -1097,13 +1090,7 @@ template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-    //Send a atomic (false) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, false)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        return hitLatency;
-    }
+    ///// PROPAGATE SNOOP UPWARD HERE
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt->getAddr());
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 33a8a4e63..bc8de0d26 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -295,11 +295,14 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq);
     // we only support write allocate right now
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
+    tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
     if (hasOwned) {
         tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd);
+        tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq);
         tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd);
     }
 
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
index 1c89c703a..095260ca4 100644
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -161,12 +161,6 @@ class SimpleCoherence
     bool allowFastWrites() { return false; }
 
     bool hasProtocol() { return true; }
-
-    bool propogateInvalidate(PacketPtr pkt, bool isTiming)
-    {
-        //For now we do nothing, asssumes simple coherence is top level of cache
-        return false;
-    }
 };
 
 #endif //__SIMPLE_COHERENCE_HH__
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index add11dfe7..e9aa89bf8 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -119,7 +119,6 @@ MSHRQueue::allocate(PacketPtr &pkt, int size)
     if (!pkt->needsResponse()) {
         mshr->allocateAsBuffer(pkt);
     } else {
-        assert(size !=0);
         mshr->allocate(pkt->cmd, aligned_addr, size, pkt);
         allocatedTargets += 1;
     }
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 2463a19ba..8c69def37 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -56,17 +56,18 @@ MemCmd::commandInfo[] =
     { 0, InvalidCmd, "InvalidCmd" },
     /* ReadReq */
     { SET3(IsRead, IsRequest, NeedsResponse), ReadResp, "ReadReq" },
+    /* ReadResp */
+    { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteReq */
     { SET4(IsWrite, IsRequest, NeedsResponse, HasData),
             WriteResp, "WriteReq" },
-    /* WriteReqNoAck */
-    { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "WriteReqNoAck" },
-    /* ReadResp */
-    { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteResp */
     { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "Writeback" },
+    { SET4(IsWrite, IsRequest, HasData, NeedsResponse),
+            WritebackAck, "Writeback" },
+    /* WritebackAck */
+    { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 577f99116..413ffa26b 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -73,11 +73,11 @@ class MemCmd
     {
         InvalidCmd,
         ReadReq,
-        WriteReq,
-        WriteReqNoAck,
         ReadResp,
+        WriteReq,
         WriteResp,
         Writeback,
+        WritebackAck,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
-- 
cgit v1.2.3


From 41dde5f6fdf195b8d51d70119737c5e3f7391f78 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 06:32:24 -0700
Subject: memtest.hh: Fix description string. Minor whitespace cleanup.

src/cpu/memtest/memtest.hh:
    Fix description string.
    Minor whitespace cleanup.

--HG--
extra : convert_revision : 0c7213d088da46de9713ca6beabc30523ccb1c8c
---
 src/cpu/memtest/memtest.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 264309fd7..84e16b98a 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -85,13 +85,13 @@ class MemTest : public MemObject
         TickEvent(MemTest *c)
             : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) {}
         void process() {cpu->tick();}
-        virtual const char *description() { return "tick event"; }
+        virtual const char *description() { return "MemTest tick"; }
     };
 
     TickEvent tickEvent;
+
     class CpuPort : public Port
     {
-
         MemTest *memtest;
 
       public:
-- 
cgit v1.2.3


From da46364b1878339841e9cda5a62ee104409b6535 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 07:30:55 -0700
Subject: Fix getDeviceAddressRanges() to get snooping right.

--HG--
extra : convert_revision : 2aeab25ef955ab9db7b968786faff227239fbbe4
---
 src/mem/cache/base_cache.cc |  8 --------
 src/mem/cache/base_cache.hh |  3 ---
 src/mem/cache/cache.hh      |  6 ++++++
 src/mem/cache/cache_impl.hh | 24 ++++++++++++++++++++++++
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index d75d35ebb..a47c19e60 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -57,7 +57,6 @@ BaseCache::BaseCache(const std::string &name, Params &params)
 }
 
 
-
 void
 BaseCache::CachePort::recvStatusChange(Port::Status status)
 {
@@ -66,13 +65,6 @@ BaseCache::CachePort::recvStatusChange(Port::Status status)
     }
 }
 
-void
-BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
-{
-    AddrRangeList dummy;
-    otherPort->getPeerAddressRanges(resp, dummy);
-}
-
 int
 BaseCache::CachePort::deviceBlockSize()
 {
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index f7107a86a..a27ac1788 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -91,9 +91,6 @@ class BaseCache : public MemObject
 
         virtual void recvStatusChange(Status status);
 
-        virtual void getDeviceAddressRanges(AddrRangeList &resp,
-                                            bool &snoop);
-
         virtual int deviceBlockSize();
 
         bool recvRetryCommon();
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 75fb50f4e..e14b2efe8 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -89,6 +89,9 @@ class Cache : public BaseCache
         void processRequestEvent();
         void processResponseEvent();
 
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            bool &snoop);
+
         virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
@@ -124,6 +127,9 @@ class Cache : public BaseCache
         void processRequestEvent();
         void processResponseEvent();
 
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            bool &snoop);
+
         virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 56352c110..a7f96603e 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1139,6 +1139,18 @@ Cache<TagStore,Coherence>::deletePortRefs(Port *p)
 }
 
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+{
+    // CPU side port doesn't snoop; it's a target only.
+    bool dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
+    snoop = false;
+}
+
+
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
@@ -1260,6 +1272,18 @@ Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 }
 
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+{
+    // Memory-side port always snoops.
+    bool dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
+    snoop = true;
+}
+
+
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
-- 
cgit v1.2.3


From 35cf19d441ed15d054d00674ec67ab5bc769f6d7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 17 Jun 2007 17:27:53 -0700
Subject: More major reorg of cache.  Seems to work for atomic mode now, timing
 mode still broken.

configs/example/memtest.py:
    Revamp options.
src/cpu/memtest/memtest.cc:
    No need for memory initialization.
    No need to make atomic response... memory system should do that now.
src/cpu/memtest/memtest.hh:
    MemTest really doesn't want to snoop.
src/mem/bridge.cc:
    checkFunctional() cleanup.
src/mem/bus.cc:
src/mem/bus.hh:
src/mem/cache/base_cache.cc:
src/mem/cache/base_cache.hh:
src/mem/cache/cache.cc:
src/mem/cache/cache.hh:
src/mem/cache/cache_blk.hh:
src/mem/cache/cache_builder.cc:
src/mem/cache/cache_impl.hh:
src/mem/cache/coherence/coherence_protocol.cc:
src/mem/cache/coherence/coherence_protocol.hh:
src/mem/cache/coherence/simple_coherence.hh:
src/mem/cache/miss/SConscript:
src/mem/cache/miss/mshr.cc:
src/mem/cache/miss/mshr.hh:
src/mem/cache/miss/mshr_queue.cc:
src/mem/cache/miss/mshr_queue.hh:
src/mem/cache/prefetch/base_prefetcher.cc:
src/mem/cache/tags/fa_lru.cc:
src/mem/cache/tags/fa_lru.hh:
src/mem/cache/tags/iic.cc:
src/mem/cache/tags/iic.hh:
src/mem/cache/tags/lru.cc:
src/mem/cache/tags/lru.hh:
src/mem/cache/tags/split.cc:
src/mem/cache/tags/split.hh:
src/mem/cache/tags/split_lifo.cc:
src/mem/cache/tags/split_lifo.hh:
src/mem/cache/tags/split_lru.cc:
src/mem/cache/tags/split_lru.hh:
src/mem/packet.cc:
src/mem/packet.hh:
src/mem/physical.cc:
src/mem/physical.hh:
src/mem/tport.cc:
    More major reorg.  Seems to work for atomic mode now,
    timing mode still broken.

--HG--
extra : convert_revision : 7e70dfc4a752393b911880ff028271433855ae87
---
 configs/example/memtest.py                    |   92 +-
 src/cpu/memtest/memtest.cc                    |   28 +-
 src/cpu/memtest/memtest.hh                    |    2 +-
 src/mem/bridge.cc                             |   11 +-
 src/mem/bus.cc                                |   87 +-
 src/mem/bus.hh                                |    3 -
 src/mem/cache/base_cache.cc                   |  413 ++++--
 src/mem/cache/base_cache.hh                   |  230 +--
 src/mem/cache/cache.cc                        |    3 -
 src/mem/cache/cache.hh                        |  271 ++--
 src/mem/cache/cache_blk.hh                    |   25 +-
 src/mem/cache/cache_builder.cc                |   27 +-
 src/mem/cache/cache_impl.hh                   | 1976 +++++++++++--------------
 src/mem/cache/coherence/coherence_protocol.cc |   40 +-
 src/mem/cache/coherence/coherence_protocol.hh |    4 +-
 src/mem/cache/coherence/simple_coherence.hh   |   15 +-
 src/mem/cache/miss/SConscript                 |    3 -
 src/mem/cache/miss/blocking_buffer.cc         |  245 ---
 src/mem/cache/miss/blocking_buffer.hh         |  209 ---
 src/mem/cache/miss/miss_buffer.cc             |   62 -
 src/mem/cache/miss/miss_buffer.hh             |  223 ---
 src/mem/cache/miss/miss_queue.cc              |  752 ----------
 src/mem/cache/miss/miss_queue.hh              |  327 ----
 src/mem/cache/miss/mshr.cc                    |   78 +-
 src/mem/cache/miss/mshr.hh                    |   77 +-
 src/mem/cache/miss/mshr_queue.cc              |   90 +-
 src/mem/cache/miss/mshr_queue.hh              |  137 +-
 src/mem/cache/prefetch/base_prefetcher.cc     |    1 -
 src/mem/cache/tags/fa_lru.cc                  |    5 +-
 src/mem/cache/tags/fa_lru.hh                  |    7 +-
 src/mem/cache/tags/iic.cc                     |   32 +-
 src/mem/cache/tags/iic.hh                     |   27 +-
 src/mem/cache/tags/lru.cc                     |    5 +-
 src/mem/cache/tags/lru.hh                     |   15 +-
 src/mem/cache/tags/split.cc                   |   25 +-
 src/mem/cache/tags/split.hh                   |    7 +-
 src/mem/cache/tags/split_lifo.cc              |    7 +-
 src/mem/cache/tags/split_lifo.hh              |   15 +-
 src/mem/cache/tags/split_lru.cc               |    5 +-
 src/mem/cache/tags/split_lru.hh               |   15 +-
 src/mem/packet.cc                             |   99 +-
 src/mem/packet.hh                             |  194 ++-
 src/mem/physical.cc                           |  208 ++-
 src/mem/physical.hh                           |   12 +-
 src/mem/tport.cc                              |   13 +-
 45 files changed, 2041 insertions(+), 4081 deletions(-)
 delete mode 100644 src/mem/cache/miss/blocking_buffer.cc
 delete mode 100644 src/mem/cache/miss/blocking_buffer.hh
 delete mode 100644 src/mem/cache/miss/miss_buffer.cc
 delete mode 100644 src/mem/cache/miss/miss_buffer.hh
 delete mode 100644 src/mem/cache/miss/miss_queue.cc
 delete mode 100644 src/mem/cache/miss/miss_queue.hh

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 9fd943aaa..9027a9866 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -33,14 +33,32 @@ m5.AddToPath('../common')
 
 parser = optparse.OptionParser()
 
-parser.add_option("--caches", action="store_true")
-parser.add_option("-t", "--timing", action="store_true")
-parser.add_option("-m", "--maxtick", type="int")
-parser.add_option("-l", "--maxloads", default = "1000000000000", type="int")
-parser.add_option("-n", "--numtesters", default = "8", type="int")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
+parser.add_option("-c", "--cache-levels", type="int", default=2,
+                  metavar="LEVELS",
+                  help="Number of cache levels [default: %default]")
+parser.add_option("-a", "--atomic", action="store_true",
+                  help="Use atomic (non-timing) mode")
+parser.add_option("-b", "--blocking", action="store_true",
+                  help="Use blocking caches")
+parser.add_option("-l", "--maxloads", default="1G", metavar="N",
+                  help="Stop after N loads [default: %default]")
+parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
+                  metavar="T",
+                  help="Stop after T ticks")
+parser.add_option("-n", "--numtesters", type="int", default=8,
+                  metavar="N",
+                  help="Number of tester pseudo-CPUs [default: %default]")
+parser.add_option("-p", "--protocol", default="moesi",
+                  help="Coherence protocol [default: %default]")
+
+parser.add_option("-f", "--functional", type="int", default=0,
+                  metavar="PCT",
+                  help="Target percentage of functional accesses "
+                  "[default: %default]")
+parser.add_option("-u", "--uncacheable", type="int", default=0,
+                  metavar="PCT",
+                  help="Target percentage of uncacheable accesses "
+                  "[default: %default]")
 
 (options, args) = parser.parse_args()
 
@@ -48,14 +66,29 @@ if args:
      print "Error: script doesn't take any positional arguments"
      sys.exit(1)
 
+# Should generalize this someday... would be cool to have a loop that
+# just iterates, adding a level of caching each time.
+#if options.cache_levels != 2 and options.cache_levels != 0:
+#     print "Error: number of cache levels must be 0 or 2"
+#     sys.exit(1)
+
+if options.blocking:
+     num_l1_mshrs = 1
+     num_l2_mshrs = 1
+else:
+     num_l1_mshrs = 12
+     num_l2_mshrs = 92
+
+block_size = 64
+
 # --------------------
 # Base L1 Cache
 # ====================
 
 class L1(BaseCache):
     latency = '1ns'
-    block_size = 64
-    mshrs = 12
+    block_size = block_size
+    mshrs = num_l1_mshrs
     tgts_per_mshr = 8
     protocol = CoherenceProtocol(protocol=options.protocol)
 
@@ -64,29 +97,31 @@ class L1(BaseCache):
 # ----------------------
 
 class L2(BaseCache):
-    block_size = 64
+    block_size = block_size
     latency = '10ns'
-    mshrs = 92
+    mshrs = num_l2_mshrs
     tgts_per_mshr = 16
     write_buffers = 8
+    protocol = CoherenceProtocol(protocol=options.protocol)
 
-#MAX CORES IS 8 with the false sharing method
-if options.numtesters > 8:
-     print "Error: NUmber of testers limited to 8 because of false sharing"
-     sys,exit(1)
+if options.numtesters > block_size:
+     print "Error: Number of testers limited to %s because of false sharing" \
+           % (block_size)
+     sys.exit(1)
 
-cpus = [ MemTest(atomic=not options.timing, max_loads=options.maxloads,
-                 percent_functional=50, percent_uncacheable=10,
+cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
+                 percent_functional=options.functional,
+                 percent_uncacheable=options.uncacheable,
                  progress_interval=1000)
          for i in xrange(options.numtesters) ]
 
 # system simulated
 system = System(cpu = cpus, funcmem = PhysicalMemory(),
-                physmem = PhysicalMemory(latency = "50ps"),
+                physmem = PhysicalMemory(latency = "100ns"),
                 membus = Bus(clock="500MHz", width=16))
 
 # l2cache & bus
-if options.caches:
+if options.cache_levels == 2:
     system.toL2Bus = Bus(clock="500MHz", width=16)
     system.l2c = L2(size='64kB', assoc=8)
     system.l2c.cpu_side = system.toL2Bus.port
@@ -96,10 +131,14 @@ if options.caches:
 
 # add L1 caches
 for cpu in cpus:
-    if options.caches:
+    if options.cache_levels == 2:
          cpu.l1c = L1(size = '32kB', assoc = 4)
          cpu.test = cpu.l1c.cpu_side
          cpu.l1c.mem_side = system.toL2Bus.port
+    elif options.cache_levels == 1:
+         cpu.l1c = L1(size = '32kB', assoc = 4)
+         cpu.test = cpu.l1c.cpu_side
+         cpu.l1c.mem_side = system.membus.port
     else:
          cpu.test = system.membus.port
     system.funcmem.port = cpu.functional
@@ -113,10 +152,10 @@ system.physmem.port = system.membus.port
 # -----------------------
 
 root = Root( system = system )
-if options.timing:
-    root.system.mem_mode = 'timing'
-else:
+if options.atomic:
     root.system.mem_mode = 'atomic'
+else:
+    root.system.mem_mode = 'timing'
 
 # Not much point in this being higher than the L1 latency
 m5.ticks.setGlobalFrequency('1ns')
@@ -125,9 +164,6 @@ m5.ticks.setGlobalFrequency('1ns')
 m5.instantiate(root)
 
 # simulate until program terminates
-if options.maxtick:
-    exit_event = m5.simulate(options.maxtick)
-else:
-    exit_event = m5.simulate(10000000000000)
+exit_event = m5.simulate(options.maxtick)
 
 print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 607cf1066..5d89f1b82 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -102,7 +102,6 @@ void
 MemTest::sendPkt(PacketPtr pkt) {
     if (atomic) {
         cachePort.sendAtomic(pkt);
-        pkt->makeAtomicResponse();
         completeRequest(pkt);
     }
     else if (!cachePort.sendTiming(pkt)) {
@@ -165,8 +164,6 @@ MemTest::MemTest(const string &name,
     tickEvent.schedule(0);
 
     id = TESTER_ALLOCATOR++;
-    if (TESTER_ALLOCATOR > 8)
-        panic("False sharing memtester only allows up to 8 testers");
 
     accessRetry = false;
 }
@@ -190,14 +187,8 @@ MemTest::init()
     blockAddrMask = blockSize - 1;
     traceBlockAddr = blockAddr(traceBlockAddr);
 
-    // set up intial memory contents here
-
-    cachePort.memsetBlob(baseAddr1, 1, size);
-    funcPort.memsetBlob(baseAddr1, 1, size);
-    cachePort.memsetBlob(baseAddr2, 2, size);
-    funcPort.memsetBlob(baseAddr2, 2, size);
-    cachePort.memsetBlob(uncacheAddr, 3, size);
-    funcPort.memsetBlob(uncacheAddr, 3, size);
+    // initial memory contents for both physical memory and functional
+    // memory should be 0; no need to initialize them.
 }
 
 static void
@@ -230,15 +221,10 @@ MemTest::completeRequest(PacketPtr pkt)
       case MemCmd::ReadResp:
 
         if (memcmp(pkt_data, data, pkt->getSize()) != 0) {
-            cerr << name() << ": on read of 0x" << hex << req->getPaddr()
-                 << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-                 << "@ cycle " << dec << curTick
-                 << ", cache returns 0x";
-            printData(cerr, pkt_data, pkt->getSize());
-            cerr << ", expected 0x";
-            printData(cerr, data, pkt->getSize());
-            cerr << endl;
-            fatal("");
+            panic("%s: read of %x (blk %x) @ cycle %d "
+                  "returns %x, expected %x\n", name(),
+                  req->getPaddr(), blockAddr(req->getPaddr()), curTick,
+                  *pkt_data, *data);
         }
 
         numReads++;
@@ -267,7 +253,7 @@ MemTest::completeRequest(PacketPtr pkt)
         break;
 */
       default:
-        panic("invalid command");
+        panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt());
     }
 
     if (blockAddr(req->getPaddr()) == traceBlockAddr) {
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index a6b08d61c..565fafb77 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -116,7 +116,7 @@ class MemTest : public MemObject
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop)
-        { resp.clear(); snoop = true; }
+        { resp.clear(); snoop = false; }
     };
 
     CpuPort cachePort;
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index eebf91a85..fb4574844 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -339,17 +339,14 @@ void
 Bridge::BridgePort::recvFunctional(PacketPtr pkt)
 {
     std::list<PacketBuffer*>::iterator i;
-    bool pktContinue = true;
 
     for (i = sendQueue.begin();  i != sendQueue.end(); ++i) {
-        if (pkt->intersect((*i)->pkt)) {
-            pktContinue &= fixPacket(pkt, (*i)->pkt);
-        }
+        if (pkt->checkFunctional((*i)->pkt))
+            return;
     }
 
-    if (pktContinue) {
-        otherPort->sendFunctional(pkt);
-    }
+    // fall through if pkt still not satisfied
+    otherPort->sendFunctional(pkt);
 }
 
 /** Function called by the port when the bus is receiving a status change.*/
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index ec33bd4c5..bde90c83f 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -33,7 +33,7 @@
  * Definition of a bus object.
  */
 
-
+#include <algorithm>
 #include <limits>
 
 #include "base/misc.hh"
@@ -182,8 +182,7 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (!(pkt->flags & EXPRESS_SNOOP) &&
-        tickNextIdle > curTick ||
+    if (tickNextIdle > curTick ||
         (retryList.size() && (!inRetry || pktPort != retryList.front())))
     {
         addToRetryList(pktPort);
@@ -199,7 +198,7 @@ Bus::recvTiming(PacketPtr pkt)
         port = findPort(pkt->getAddr(), pkt->getSrc());
         timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
 
-        if (pkt->flags & SATISFIED) {
+        if (pkt->memInhibitAsserted()) {
             //Cache-Cache transfer occuring
             if (inRetry) {
                 retryList.front()->onRetryList(false);
@@ -321,27 +320,6 @@ Bus::findPort(Addr addr, int id)
     return interfaces[dest_id];
 }
 
-Tick
-Bus::atomicSnoop(PacketPtr pkt, Port *responder)
-{
-    Tick response_time = 0;
-
-    for (SnoopIter s_iter = snoopPorts.begin();
-         s_iter != snoopPorts.end();
-         s_iter++) {
-        BusPort *p = *s_iter;
-        if (p != responder && p->getId() != pkt->getSrc()) {
-            Tick response = p->sendAtomic(pkt);
-            if (response) {
-                assert(!response_time);  //Multiple responders
-                response_time = response;
-            }
-        }
-    }
-
-    return response_time;
-}
-
 void
 Bus::functionalSnoop(PacketPtr pkt, Port *responder)
 {
@@ -390,19 +368,56 @@ Bus::recvAtomic(PacketPtr pkt)
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
 
-    // Assume one bus cycle in order to get through.  This may have
-    // some clock skew issues yet again...
-    pkt->finishTime = curTick + clock;
+    // Variables for recording original command and snoop response (if
+    // any)... if a snooper respondes, we will need to restore
+    // original command so that additional snoops can take place
+    // properly
+    MemCmd orig_cmd = pkt->cmd;
+    Packet::Result response_result = Packet::Unknown;
+    MemCmd response_cmd = MemCmd::InvalidCmd;
 
-    Port *port = findPort(pkt->getAddr(), pkt->getSrc());
-    Tick snoopTime = atomicSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+    Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
 
-    if (snoopTime)
-        return snoopTime;  //Snoop satisfies it
-    else if (port)
-        return port->sendAtomic(pkt);
-    else
-        return 0;
+    SnoopIter s_end = snoopPorts.end();
+    for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) {
+        BusPort *p = *s_iter;
+        // same port should not have both target addresses and snooping
+        assert(p != target_port);
+        if (p->getId() != pkt->getSrc()) {
+            p->sendAtomic(pkt);
+            if (pkt->result != Packet::Unknown) {
+                // response from snoop agent
+                assert(pkt->cmd != orig_cmd);
+                assert(pkt->memInhibitAsserted());
+                assert(pkt->isResponse());
+                // should only happen once
+                assert(response_result == Packet::Unknown);
+                assert(response_cmd == MemCmd::InvalidCmd);
+                // save response state
+                response_result = pkt->result;
+                response_cmd = pkt->cmd;
+                // restore original packet state for remaining snoopers
+                pkt->cmd = orig_cmd;
+                pkt->result = Packet::Unknown;
+            }
+        }
+    }
+
+    Tick response_time = target_port->sendAtomic(pkt);
+
+    // if we got a response from a snooper, restore it here
+    if (response_result != Packet::Unknown) {
+        assert(response_cmd != MemCmd::InvalidCmd);
+        // no one else should have responded
+        assert(pkt->result == Packet::Unknown);
+        assert(pkt->cmd == orig_cmd);
+        pkt->cmd = response_cmd;
+        pkt->result = response_result;
+    }
+
+    // why do we have this packet field and the return value both???
+    pkt->finishTime = std::max(response_time, curTick + clock);
+    return pkt->finishTime;
 }
 
 /** Function called by the port when the bus is receiving a Functional
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 5dd98c07e..33619bf45 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -182,9 +182,6 @@ class Bus : public MemObject
      */
     Port *findPort(Addr addr, int id);
 
-    /** Snoop all relevant ports atomicly. */
-    Tick atomicSnoop(PacketPtr pkt, Port* responder);
-
     /** Snoop all relevant ports functionally. */
     void functionalSnoop(PacketPtr pkt, Port *responder);
 
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index a47c19e60..c7006550b 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -41,18 +41,23 @@
 using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
-    : Port(_name, _cache), cache(_cache), otherPort(NULL)
+    : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
+      blocked(false), waitingOnRetry(false), mustSendRetry(false),
+      requestCauses(0)
 {
-    blocked = false;
-    waitingOnRetry = false;
 }
 
 
 BaseCache::BaseCache(const std::string &name, Params &params)
     : MemObject(name),
-      blocked(0), blockedSnoop(0),
+      mshrQueue(params.numMSHRs, 4),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
       blkSize(params.blkSize),
-      missCount(params.maxMisses), drainEvent(NULL)
+      numTarget(params.numTargets),
+      blocked(0),
+      noTargetMSHR(NULL),
+      missCount(params.maxMisses),
+      drainEvent(NULL)
 {
 }
 
@@ -71,139 +76,21 @@ BaseCache::CachePort::deviceBlockSize()
     return cache->getBlockSize();
 }
 
-bool
-BaseCache::CachePort::checkFunctional(PacketPtr pkt)
-{
-    //Check storage here first
-    list<PacketPtr>::iterator i = drainList.begin();
-    list<PacketPtr>::iterator iend = drainList.end();
-    bool notDone = true;
-    while (i != iend && notDone) {
-        PacketPtr target = *i;
-        // If the target contains data, and it overlaps the
-        // probed request, need to update data
-        if (target->intersect(pkt)) {
-            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a drain\n",
-                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
-            notDone = fixPacket(pkt, target);
-        }
-        i++;
-    }
-    //Also check the response not yet ready to be on the list
-    std::list<std::pair<Tick,PacketPtr> >::iterator j = transmitList.begin();
-    std::list<std::pair<Tick,PacketPtr> >::iterator jend = transmitList.end();
-
-    while (j != jend && notDone) {
-        PacketPtr target = j->second;
-        // If the target contains data, and it overlaps the
-        // probed request, need to update data
-        if (target->intersect(pkt)) {
-            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a response\n",
-                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
-            notDone = fixDelayedResponsePacket(pkt, target);
-        }
-        j++;
-    }
-    return notDone;
-}
 
 void
 BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
 {
-    bool notDone = checkFunctional(pkt);
-    if (notDone)
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
         sendFunctional(pkt);
 }
 
 
-void
-BaseCache::CachePort::respond(PacketPtr pkt, Tick time)
-{
-    assert(time >= curTick);
-    if (pkt->needsResponse()) {
-        if (transmitList.empty()) {
-            assert(!responseEvent->scheduled());
-            responseEvent->schedule(time);
-            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
-            return;
-        }
-
-        // something is on the list and this belongs at the end
-        if (time >= transmitList.back().first) {
-            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
-            return;
-        }
-        // Something is on the list and this belongs somewhere else
-        std::list<std::pair<Tick,PacketPtr> >::iterator i =
-            transmitList.begin();
-        std::list<std::pair<Tick,PacketPtr> >::iterator end =
-            transmitList.end();
-        bool done = false;
-
-        while (i != end && !done) {
-            if (time < i->first) {
-                if (i == transmitList.begin()) {
-                    //Inserting at begining, reschedule
-                    responseEvent->reschedule(time);
-                }
-                transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
-                done = true;
-            }
-            i++;
-        }
-    }
-    else {
-        assert(0);
-        // this code was on the cpuSidePort only... do we still need it?
-        if (pkt->cmd != MemCmd::UpgradeReq)
-        {
-            delete pkt->req;
-            delete pkt;
-        }
-    }
-}
-
-bool
-BaseCache::CachePort::drainResponse()
-{
-    DPRINTF(CachePort,
-            "%s attempting to send a retry for response (%i waiting)\n",
-            name(), drainList.size());
-    //We have some responses to drain first
-    PacketPtr pkt = drainList.front();
-    if (sendTiming(pkt)) {
-        drainList.pop_front();
-        DPRINTF(CachePort, "%s sucessful in sending a retry for"
-                "response (%i still waiting)\n", name(), drainList.size());
-        if (!drainList.empty() || isBusRequested()) {
-
-            DPRINTF(CachePort, "%s has more responses/requests\n", name());
-            return false;
-        }
-    } else {
-        waitingOnRetry = true;
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
-    }
-    return true;
-}
-
-
 bool
 BaseCache::CachePort::recvRetryCommon()
 {
     assert(waitingOnRetry);
     waitingOnRetry = false;
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-        // Check if we're done draining once this list is empty
-        if (drainList.empty()) {
-            cache->checkDrain();
-        }
-        return true;
-    }
     return false;
 }
 
@@ -451,17 +338,289 @@ BaseCache::regStats()
         .desc("number of cache copies performed")
         ;
 
+    writebacks
+        .init(maxThreadsPerCPU)
+        .name(name() + ".writebacks")
+        .desc("number of writebacks")
+        .flags(total)
+        ;
+
+    // MSHR statistics
+    // MSHR hit statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_hits[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_hits")
+            .desc("number of " + cstr + " MSHR hits")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrHits
+        .name(name() + ".demand_mshr_hits")
+        .desc("number of demand (read+write) MSHR hits")
+        .flags(total)
+        ;
+    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
+
+    overallMshrHits
+        .name(name() + ".overall_mshr_hits")
+        .desc("number of overall MSHR hits")
+        .flags(total)
+        ;
+    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
+        mshr_hits[MemCmd::HardPFReq];
+
+    // MSHR miss statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_misses[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_misses")
+            .desc("number of " + cstr + " MSHR misses")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMisses
+        .name(name() + ".demand_mshr_misses")
+        .desc("number of demand (read+write) MSHR misses")
+        .flags(total)
+        ;
+    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
+
+    overallMshrMisses
+        .name(name() + ".overall_mshr_misses")
+        .desc("number of overall MSHR misses")
+        .flags(total)
+        ;
+    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
+        mshr_misses[MemCmd::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_miss_latency[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_miss_latency")
+            .desc("number of " + cstr + " MSHR miss cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMissLatency
+        .name(name() + ".demand_mshr_miss_latency")
+        .desc("number of demand (read+write) MSHR miss cycles")
+        .flags(total)
+        ;
+    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
+        + mshr_miss_latency[MemCmd::WriteReq];
+
+    overallMshrMissLatency
+        .name(name() + ".overall_mshr_miss_latency")
+        .desc("number of overall MSHR miss cycles")
+        .flags(total)
+        ;
+    overallMshrMissLatency = demandMshrMissLatency +
+        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
+
+    // MSHR uncacheable statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_uncacheable[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_uncacheable")
+            .desc("number of " + cstr + " MSHR uncacheable")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheable
+        .name(name() + ".overall_mshr_uncacheable_misses")
+        .desc("number of overall MSHR uncacheable misses")
+        .flags(total)
+        ;
+    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
+        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
+        + mshr_uncacheable[MemCmd::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_uncacheable_lat[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_uncacheable_latency")
+            .desc("number of " + cstr + " MSHR uncacheable cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheableLatency
+        .name(name() + ".overall_mshr_uncacheable_latency")
+        .desc("number of overall MSHR uncacheable cycles")
+        .flags(total)
+        ;
+    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
+        + mshr_uncacheable_lat[MemCmd::WriteReq]
+        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
+        + mshr_uncacheable_lat[MemCmd::HardPFReq];
+
+#if 0
+    // MSHR access formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshrAccesses[access_idx]
+            .name(name() + "." + cstr + "_mshr_accesses")
+            .desc("number of " + cstr + " mshr accesses(hits+misses)")
+            .flags(total | nozero | nonan)
+            ;
+        mshrAccesses[access_idx] =
+            mshr_hits[access_idx] + mshr_misses[access_idx]
+            + mshr_uncacheable[access_idx];
+    }
+
+    demandMshrAccesses
+        .name(name() + ".demand_mshr_accesses")
+        .desc("number of demand (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    demandMshrAccesses = demandMshrHits + demandMshrMisses;
+
+    overallMshrAccesses
+        .name(name() + ".overall_mshr_accesses")
+        .desc("number of overall (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    overallMshrAccesses = overallMshrHits + overallMshrMisses
+        + overallMshrUncacheable;
+#endif
+
+    // MSHR miss rate formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshrMissRate[access_idx]
+            .name(name() + "." + cstr + "_mshr_miss_rate")
+            .desc("mshr miss rate for " + cstr + " accesses")
+            .flags(total | nozero | nonan)
+            ;
+
+        mshrMissRate[access_idx] =
+            mshr_misses[access_idx] / accesses[access_idx];
+    }
+
+    demandMshrMissRate
+        .name(name() + ".demand_mshr_miss_rate")
+        .desc("mshr miss rate for demand accesses")
+        .flags(total)
+        ;
+    demandMshrMissRate = demandMshrMisses / demandAccesses;
+
+    overallMshrMissRate
+        .name(name() + ".overall_mshr_miss_rate")
+        .desc("mshr miss rate for overall accesses")
+        .flags(total)
+        ;
+    overallMshrMissRate = overallMshrMisses / overallAccesses;
+
+    // mshrMiss latency formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        avgMshrMissLatency[access_idx]
+            .name(name() + "." + cstr + "_avg_mshr_miss_latency")
+            .desc("average " + cstr + " mshr miss latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrMissLatency[access_idx] =
+            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
+    }
+
+    demandAvgMshrMissLatency
+        .name(name() + ".demand_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
+
+    overallAvgMshrMissLatency
+        .name(name() + ".overall_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
+
+    // mshrUncacheable latency formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        avgMshrUncacheableLatency[access_idx]
+            .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
+            .desc("average " + cstr + " mshr uncacheable latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrUncacheableLatency[access_idx] =
+            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
+    }
+
+    overallAvgMshrUncacheableLatency
+        .name(name() + ".overall_avg_mshr_uncacheable_latency")
+        .desc("average overall mshr uncacheable latency")
+        .flags(total)
+        ;
+    overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
+
+    mshr_cap_events
+        .init(maxThreadsPerCPU)
+        .name(name() + ".mshr_cap_events")
+        .desc("number of times MSHR cap was activated")
+        .flags(total)
+        ;
+
+    //software prefetching stats
+    soft_prefetch_mshr_full
+        .init(maxThreadsPerCPU)
+        .name(name() + ".soft_prefetch_mshr_full")
+        .desc("number of mshr full events for SW prefetching instrutions")
+        .flags(total)
+        ;
+
+    mshr_no_allocate_misses
+        .name(name() +".no_allocate_misses")
+        .desc("Number of misses that were no-allocate")
+        ;
+
 }
 
 unsigned int
 BaseCache::drain(Event *de)
 {
+    int count = memSidePort->drain(de) + cpuSidePort->drain(de);
+
     // Set status
-    if (!canDrain()) {
+    if (count != 0) {
         drainEvent = de;
 
         changeState(SimObject::Draining);
-        return 1;
+        return count;
     }
 
     changeState(SimObject::Drained);
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index a27ac1788..5969b4b3f 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -46,11 +46,13 @@
 #include "base/misc.hh"
 #include "base/statistics.hh"
 #include "base/trace.hh"
+#include "mem/cache/miss/mshr_queue.hh"
 #include "mem/mem_object.hh"
 #include "mem/packet.hh"
-#include "mem/port.hh"
+#include "mem/tport.hh"
 #include "mem/request.hh"
 #include "sim/eventq.hh"
+#include "sim/sim_exit.hh"
 
 /**
  * Reasons for Caches to be Blocked.
@@ -79,7 +81,7 @@ class MSHR;
  */
 class BaseCache : public MemObject
 {
-    class CachePort : public Port
+    class CachePort : public SimpleTimingPort
     {
       public:
         BaseCache *cache;
@@ -102,77 +104,76 @@ class BaseCache : public MemObject
 
         void clearBlocked();
 
-        bool checkFunctional(PacketPtr pkt);
-
         void checkAndSendFunctional(PacketPtr pkt);
 
-        bool canDrain() { return drainList.empty() && transmitList.empty(); }
-
-        bool drainResponse();
-
         CachePort *otherPort;
 
         bool blocked;
 
-        bool mustSendRetry;
-
         bool waitingOnRetry;
 
+        bool mustSendRetry;
+
         /**
          * Bit vector for the outstanding requests for the master interface.
          */
         uint8_t requestCauses;
 
-        std::list<PacketPtr> drainList;
-
-        std::list<std::pair<Tick,PacketPtr> > transmitList;
-
         bool isBusRequested() { return requestCauses != 0; }
 
-        // These need to be virtual since the Event objects depend on
-        // cache template parameters.
-        virtual void scheduleRequestEvent(Tick t) = 0;
-
         void requestBus(RequestCause cause, Tick time)
         {
+            DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
             if (!isBusRequested() && !waitingOnRetry) {
-                scheduleRequestEvent(time);
+                assert(!sendEvent->scheduled());
+                sendEvent->schedule(time);
             }
             requestCauses |= (1 << cause);
         }
 
         void deassertBusRequest(RequestCause cause)
         {
+            DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause);
             requestCauses &= ~(1 << cause);
         }
 
-        void respond(PacketPtr pkt, Tick time);
+        void respond(PacketPtr pkt, Tick time) {
+            schedSendTiming(pkt, time);
+        }
     };
 
   public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
     CachePort *memSidePort;
 
-  private:
+  protected:
+
+    /** Miss status registers */
+    MSHRQueue mshrQueue;
+
+    /** Write/writeback buffer */
+    MSHRQueue writeBuffer;
+
+    /** Block size of this cache */
+    const int blkSize;
+
+    /** The number of targets for each MSHR. */
+    const int numTarget;
+
+    /** Increasing order number assigned to each incoming request. */
+    uint64_t order;
+
     /**
      * Bit vector of the blocking reasons for the access path.
      * @sa #BlockedCause
      */
     uint8_t blocked;
 
-    /**
-     * Bit vector for the blocking reasons for the snoop path.
-     * @sa #BlockedCause
-     */
-    uint8_t blockedSnoop;
-
-  protected:
-
     /** Stores time the cache blocked for statistics. */
     Tick blockedCycle;
 
-    /** Block size of this cache */
-    const int blkSize;
+    /** Pointer to the MSHR that has no targets. */
+    MSHR *noTargetMSHR;
 
     /** The number of misses to trigger an exit event. */
     Counter missCount;
@@ -246,6 +247,73 @@ class BaseCache : public MemObject
     /** The number of cache copies performed. */
     Stats::Scalar<> cacheCopies;
 
+    /** Number of blocks written back per thread. */
+    Stats::Vector<> writebacks;
+
+    /** Number of misses that hit in the MSHRs per command and thread. */
+    Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS];
+    /** Demand misses that hit in the MSHRs. */
+    Stats::Formula demandMshrHits;
+    /** Total number of misses that hit in the MSHRs. */
+    Stats::Formula overallMshrHits;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS];
+    /** Demand misses that miss in the MSHRs. */
+    Stats::Formula demandMshrMisses;
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrMisses;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrUncacheable;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
+    /** Total cycle latency of demand MSHR misses. */
+    Stats::Formula demandMshrMissLatency;
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrMissLatency;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrUncacheableLatency;
+
+    /** The total number of MSHR accesses per command and thread. */
+    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
+    /** The total number of demand MSHR accesses. */
+    Stats::Formula demandMshrAccesses;
+    /** The total number of MSHR accesses. */
+    Stats::Formula overallMshrAccesses;
+
+    /** The miss rate in the MSHRs pre command and thread. */
+    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
+    /** The demand miss rate in the MSHRs. */
+    Stats::Formula demandMshrMissRate;
+    /** The overall miss rate in the MSHRs. */
+    Stats::Formula overallMshrMissRate;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
+    /** The average latency of a demand MSHR miss. */
+    Stats::Formula demandAvgMshrMissLatency;
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrMissLatency;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrUncacheableLatency;
+
+    /** The number of times a thread hit its MSHR cap. */
+    Stats::Vector<> mshr_cap_events;
+    /** The number of times software prefetches caused the MSHR to block. */
+    Stats::Vector<> soft_prefetch_mshr_full;
+
+    Stats::Scalar<> mshr_no_allocate_misses;
+
     /**
      * @}
      */
@@ -260,12 +328,13 @@ class BaseCache : public MemObject
     class Params
     {
       public:
-        /** List of address ranges of this cache. */
-        std::vector<Range<Addr> > addrRange;
         /** The hit latency for this cache. */
         int hitLatency;
         /** The block size of this cache. */
         int blkSize;
+        int numMSHRs;
+        int numTargets;
+        int numWriteBuffers;
         /**
          * The maximum number of misses this cache should handle before
          * ending the simulation.
@@ -275,10 +344,12 @@ class BaseCache : public MemObject
         /**
          * Construct an instance of this parameter class.
          */
-        Params(std::vector<Range<Addr> > addr_range,
-               int hit_latency, int _blkSize, Counter max_misses)
-            : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize),
-              maxMisses(max_misses)
+        Params(int _hitLatency, int _blkSize,
+               int _numMSHRs, int _numTargets, int _numWriteBuffers,
+               Counter _maxMisses)
+            : hitLatency(_hitLatency), blkSize(_blkSize),
+              numMSHRs(_numMSHRs), numTargets(_numTargets),
+              numWriteBuffers(_numWriteBuffers), maxMisses(_maxMisses)
         {
         }
     };
@@ -307,6 +378,10 @@ class BaseCache : public MemObject
         return blkSize;
     }
 
+
+    Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }
+
+
     /**
      * Returns true if the cache is blocked for accesses.
      */
@@ -315,14 +390,6 @@ class BaseCache : public MemObject
         return blocked != 0;
     }
 
-    /**
-     * Returns true if the cache is blocked for snoops.
-     */
-    bool isBlockedForSnoop()
-    {
-        return blockedSnoop != 0;
-    }
-
     /**
      * Marks the access path of the cache as blocked for the given cause. This
      * also sets the blocked flag in the slave interface.
@@ -345,23 +412,6 @@ class BaseCache : public MemObject
         }
     }
 
-    /**
-     * Marks the snoop path of the cache as blocked for the given cause. This
-     * also sets the blocked flag in the master interface.
-     * @param cause The reason to block the snoop path.
-     */
-    void setBlockedForSnoop(BlockedCause cause)
-    {
-        uint8_t flag = 1 << cause;
-        uint8_t old_state = blockedSnoop;
-        if (!(blockedSnoop & flag)) {
-            //Wasn't already blocked for this cause
-            blockedSnoop |= flag;
-            if (!old_state)
-                memSidePort->setBlocked();
-        }
-    }
-
     /**
      * Marks the cache as unblocked for the given cause. This also clears the
      * blocked flags in the appropriate interfaces.
@@ -383,13 +433,6 @@ class BaseCache : public MemObject
                 cpuSidePort->clearBlocked();
             }
         }
-        if (blockedSnoop & flag)
-        {
-            blockedSnoop &= ~flag;
-            if (!isBlockedForSnoop()) {
-                memSidePort->clearBlocked();
-            }
-        }
     }
 
     /**
@@ -418,55 +461,26 @@ class BaseCache : public MemObject
     void deassertMemSideBusRequest(RequestCause cause)
     {
         memSidePort->deassertBusRequest(cause);
-        checkDrain();
+        // checkDrain();
     }
 
-    /**
-     * Send a response to the slave interface.
-     * @param pkt The request being responded to.
-     * @param time The time the response is ready.
-     */
-    void respond(PacketPtr pkt, Tick time)
-    {
-        cpuSidePort->respond(pkt, time);
-    }
+    virtual unsigned int drain(Event *de);
 
-    /**
-     * Suppliess the data if cache to cache transfers are enabled.
-     * @param pkt The bus transaction to fulfill.
-     */
-    void respondToSnoop(PacketPtr pkt, Tick time)
-    {
-        memSidePort->respond(pkt, time);
-    }
+    virtual bool inCache(Addr addr) = 0;
 
-    virtual unsigned int drain(Event *de);
+    virtual bool inMissQueue(Addr addr) = 0;
 
-    void checkDrain()
+    void incMissCount(PacketPtr pkt)
     {
-        if (drainEvent && canDrain()) {
-            drainEvent->process();
-            changeState(SimObject::Drained);
-            // Clear the drain event
-            drainEvent = NULL;
-        }
-    }
+        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
-    bool canDrain()
-    {
-        if (isMemSideBusRequested()) {
-            return false;
-        } else if (memSidePort && !memSidePort->canDrain()) {
-            return false;
-        } else if (cpuSidePort && !cpuSidePort->canDrain()) {
-            return false;
+        if (missCount) {
+            --missCount;
+            if (missCount == 0)
+                exitSimLoop("A cache reached the maximum miss count");
         }
-        return true;
     }
 
-    virtual bool inCache(Addr addr) = 0;
-
-    virtual bool inMissQueue(Addr addr) = 0;
 };
 
 #endif //__BASE_CACHE_HH__
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 2b4e7b9c8..96f9a2e11 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -58,9 +58,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-
 #include "mem/cache/coherence/simple_coherence.hh"
 
 #include "mem/cache/cache_impl.hh"
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index e14b2efe8..16d15cf86 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -45,12 +45,11 @@
 
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/miss_buffer.hh"
+#include "mem/cache/miss/mshr.hh"
 
 #include "sim/eventq.hh"
 
 //Forward decleration
-class MSHR;
 class BasePrefetcher;
 
 /**
@@ -86,29 +85,14 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
-        void processRequestEvent();
-        void processResponseEvent();
-
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop);
 
         virtual bool recvTiming(PacketPtr pkt);
 
-        virtual void recvRetry();
-
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
-
-        typedef EventWrapper<CpuSidePort, &CpuSidePort::processResponseEvent>
-                ResponseEvent;
-
-        typedef EventWrapper<CpuSidePort, &CpuSidePort::processRequestEvent>
-                RequestEvent;
-
-        virtual void scheduleRequestEvent(Tick t) {
-            new RequestEvent(this, t);
-        }
     };
 
     class MemSidePort : public CachePort
@@ -124,8 +108,9 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
-        void processRequestEvent();
-        void processResponseEvent();
+        void sendPacket();
+
+        void processSendEvent();
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop);
@@ -138,21 +123,13 @@ class Cache : public BaseCache
 
         virtual void recvFunctional(PacketPtr pkt);
 
-        typedef EventWrapper<MemSidePort, &MemSidePort::processResponseEvent>
-                ResponseEvent;
-
-        typedef EventWrapper<MemSidePort, &MemSidePort::processRequestEvent>
-                RequestEvent;
-
-        virtual void scheduleRequestEvent(Tick t) {
-            new RequestEvent(this, t);
-        }
+        typedef EventWrapper<MemSidePort, &MemSidePort::processSendEvent>
+                SendEvent;
     };
 
     /** Tag and data Storage */
     TagStore *tags;
-    /** Miss and Writeback handler */
-    MissBuffer *missQueue;
+
     /** Coherence protocol. */
     Coherence *coherence;
 
@@ -176,23 +153,6 @@ class Cache : public BaseCache
      */
     int hitLatency;
 
-     /**
-      * A permanent mem req to always be used to cause invalidations.
-      * Used to append to target list, to cause an invalidation.
-      */
-    PacketPtr invalidatePkt;
-    Request *invalidateReq;
-
-    /**
-     * Policy class for performing compression.
-     */
-    CompressionAlgorithm *compressionAlg;
-
-    /**
-     * The block size of this cache. Set to value in the Tags object.
-     */
-    const int16_t blkSize;
-
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
@@ -200,50 +160,6 @@ class Cache : public BaseCache
 
     const bool prefetchMiss;
 
-    /**
-     * Can the data can be stored in a compressed form.
-     */
-    const bool storeCompressed;
-
-    /**
-     * Do we need to compress blocks on writebacks (i.e. because
-     * writeback bus is compressed but storage is not)?
-     */
-    const bool compressOnWriteback;
-
-    /**
-     * The latency of a compression operation.
-     */
-    const int16_t compLatency;
-
-    /**
-     * Should we use an adaptive compression scheme.
-     */
-    const bool adaptiveCompression;
-
-    /**
-     * Do writebacks need to be compressed (i.e. because writeback bus
-     * is compressed), whether or not they're already compressed for
-     * storage.
-     */
-    const bool writebackCompressed;
-
-    /**
-     * Compare the internal block data to the fast access block data.
-     * @param blk The cache block to check.
-     * @return True if the data is the same.
-     */
-    bool verifyData(BlkType *blk);
-
-    /**
-     * Update the internal data of the block. The data to write is assumed to
-     * be in the fast access data.
-     * @param blk The block with the data to update.
-     * @param writebacks A list to store any generated writebacks.
-     * @param compress_block True if we should compress this block
-     */
-    void updateData(BlkType *blk, PacketList &writebacks, bool compress_block);
-
     /**
      * Handle a replacement for the given request.
      * @param blk A pointer to the block, usually NULL
@@ -251,7 +167,7 @@ class Cache : public BaseCache
      * @param new_state The new state of the block.
      * @param writebacks A list to store any generated writebacks.
      */
-    BlkType* doReplacement(BlkType *blk, PacketPtr &pkt,
+    BlkType* doReplacement(BlkType *blk, PacketPtr pkt,
                            CacheBlk::State new_state, PacketList &writebacks);
 
     /**
@@ -263,59 +179,38 @@ class Cache : public BaseCache
      * @return Pointer to the cache block touched by the request. NULL if it
      * was a miss.
      */
-    BlkType* handleAccess(PacketPtr &pkt, int & lat,
-                          PacketList & writebacks, bool update = true);
-
+    bool access(PacketPtr pkt, BlkType *blk, int & lat);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
      */
-    void cmpAndSwap(BlkType *blk, PacketPtr &pkt);
-
-    /**
-     * Populates a cache block and handles all outstanding requests for the
-     * satisfied fill request. This version takes an MSHR pointer and uses its
-     * request to fill the cache block, while repsonding to its targets.
-     * @param blk The cache block if it already exists.
-     * @param mshr The MSHR that contains the fill data and targets to satisfy.
-     * @param new_state The state of the new cache block.
-     * @param writebacks List for any writebacks that need to be performed.
-     * @return Pointer to the new cache block.
-     */
-    BlkType* handleFill(BlkType *blk, MSHR * mshr, CacheBlk::State new_state,
-                        PacketList & writebacks, PacketPtr pkt);
+    void cmpAndSwap(BlkType *blk, PacketPtr pkt);
 
     /**
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes two memory requests. One
      * contains the fill data, the other is an optional target to satisfy.
      * Used for Cache::probe.
-     * @param blk The cache block if it already exists.
      * @param pkt The memory request with the fill data.
-     * @param new_state The state of the new cache block.
+     * @param blk The cache block if it already exists.
      * @param writebacks List for any writebacks that need to be performed.
-     * @param target The memory request to perform after the fill.
      * @return Pointer to the new cache block.
      */
-    BlkType* handleFill(BlkType *blk, PacketPtr &pkt,
-                        CacheBlk::State new_state,
-                        PacketList & writebacks, PacketPtr target = NULL);
+    BlkType *handleFill(PacketPtr pkt, BlkType *blk,
+                        PacketList &writebacks);
 
-    /**
-     * Sets the blk to the new state and handles the given request.
-     * @param blk The cache block being snooped.
-     * @param new_state The new coherence state for the block.
-     * @param pkt The request to satisfy
-     */
-    void handleSnoop(BlkType *blk, CacheBlk::State new_state,
-                     PacketPtr &pkt);
+    bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
+    bool satisfyTarget(MSHR::Target *target, BlkType *blk);
+    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+
+    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
 
     /**
      * Sets the blk to the new state.
      * @param blk The cache block being snooped.
      * @param new_state The new coherence state for the block.
      */
-    void handleSnoop(BlkType *blk, CacheBlk::State new_state);
+    void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing);
 
     /**
      * Create a writeback request for the given block.
@@ -330,44 +225,24 @@ class Cache : public BaseCache
     {
       public:
         TagStore *tags;
-        MissBuffer *missQueue;
         Coherence *coherence;
         BaseCache::Params baseParams;
         BasePrefetcher*prefetcher;
         bool prefetchAccess;
-        int hitLatency;
-        CompressionAlgorithm *compressionAlg;
-        const int16_t blkSize;
         const bool doFastWrites;
         const bool prefetchMiss;
-        const bool storeCompressed;
-        const bool compressOnWriteback;
-        const int16_t compLatency;
-        const bool adaptiveCompression;
-        const bool writebackCompressed;
 
-        Params(TagStore *_tags, MissBuffer *mq, Coherence *coh,
+        Params(TagStore *_tags, Coherence *coh,
                BaseCache::Params params,
                BasePrefetcher *_prefetcher,
                bool prefetch_access, int hit_latency,
                bool do_fast_writes,
-               bool store_compressed, bool adaptive_compression,
-               bool writeback_compressed,
-               CompressionAlgorithm *_compressionAlg, int comp_latency,
                bool prefetch_miss)
-            : tags(_tags), missQueue(mq), coherence(coh),
+            : tags(_tags), coherence(coh),
               baseParams(params),
               prefetcher(_prefetcher), prefetchAccess(prefetch_access),
-              hitLatency(hit_latency),
-              compressionAlg(_compressionAlg),
-              blkSize(_tags->getBlockSize()),
               doFastWrites(do_fast_writes),
-              prefetchMiss(prefetch_miss),
-              storeCompressed(store_compressed),
-              compressOnWriteback(!store_compressed && writeback_compressed),
-              compLatency(comp_latency),
-              adaptiveCompression(adaptive_compression),
-              writebackCompressed(writeback_compressed)
+              prefetchMiss(prefetch_miss)
         {
         }
     };
@@ -385,85 +260,105 @@ class Cache : public BaseCache
      * @param pkt The request to perform.
      * @return The result of the access.
      */
-    bool access(PacketPtr &pkt);
+    bool timingAccess(PacketPtr pkt);
 
     /**
-     * Selects a request to send on the bus.
-     * @return The memory request to service.
+     * Performs the access specified by the request.
+     * @param pkt The request to perform.
+     * @return The result of the access.
      */
-    PacketPtr getPacket();
+    Tick atomicAccess(PacketPtr pkt);
 
     /**
-     * Was the request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
+     * Performs the access specified by the request.
+     * @param pkt The request to perform.
+     * @return The result of the access.
      */
-    void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
+    void functionalAccess(PacketPtr pkt, CachePort *otherSidePort);
 
     /**
      * Handles a response (cache line fill/write ack) from the bus.
      * @param pkt The request being responded to.
      */
-    void handleResponse(PacketPtr &pkt);
+    void handleResponse(PacketPtr pkt);
 
     /**
      * Snoops bus transactions to maintain coherence.
      * @param pkt The current bus transaction.
      */
-    void snoop(PacketPtr &pkt);
+    void snoopTiming(PacketPtr pkt);
 
-    void snoopResponse(PacketPtr &pkt);
+    /**
+     * Snoop for the provided request in the cache and return the estimated
+     * time of completion.
+     * @param pkt The memory request to snoop
+     * @return The estimated completion time.
+     */
+    Tick snoopAtomic(PacketPtr pkt);
 
     /**
      * Squash all requests associated with specified thread.
      * intended for use by I-cache.
      * @param threadNum The thread to squash.
      */
-    void squash(int threadNum)
-    {
-        missQueue->squash(threadNum);
-    }
+    void squash(int threadNum);
 
     /**
-     * Return the number of outstanding misses in a Cache.
-     * Default returns 0.
-     *
-     * @retval unsigned The number of missing still outstanding.
+     * Allocate a new MSHR or write buffer to handle a miss.
+     * @param pkt The access that missed.
+     * @param time The time to continue processing the miss.
+     * @param isFill Whether to fetch & allocate a block
+     *               or just forward the request.
      */
-    unsigned outstandingMisses() const
-    {
-        return missQueue->getMisses();
-    }
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
+                         bool requestBus);
 
     /**
-     * Perform the access specified in the request and return the estimated
-     * time of completion. This function can either update the hierarchy state
-     * or just perform the access wherever the data is found depending on the
-     * state of the update flag.
-     * @param pkt The memory request to satisfy
-     * @param update If true, update the hierarchy, otherwise just perform the
-     * request.
-     * @return The estimated completion time.
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
      */
-    Tick probe(PacketPtr &pkt, bool update, CachePort * otherSidePort);
+    MSHR *getNextMSHR();
+    PacketPtr getPacket();
 
     /**
-     * Snoop for the provided request in the cache and return the estimated
-     * time of completion.
-     * @todo Can a snoop probe not change state?
-     * @param pkt The memory request to satisfy
-     * @param update If true, update the hierarchy, otherwise just perform the
-     * request.
-     * @return The estimated completion time.
+     * Marks a request as in service (sent on the bus). This can have side
+     * effect since storage for no response commands is deallocated once they
+     * are successfully sent.
+     * @param pkt The request that was sent on the bus.
      */
-    Tick snoopProbe(PacketPtr &pkt);
+    void markInService(MSHR *mshr);
+
+    /**
+     * Collect statistics and free resources of a satisfied request.
+     * @param pkt The request that has been satisfied.
+     * @param time The time when the request is satisfied.
+     */
+    void handleResponse(PacketPtr pkt, Tick time);
+
+    /**
+     * Perform the given writeback request.
+     * @param pkt The writeback request.
+     */
+    void doWriteback(PacketPtr pkt);
+
+    /**
+     * Return whether there are any outstanding misses.
+     */
+    bool outstandingMisses() const
+    {
+        return mshrQueue.allocated != 0;
+    }
+
+    CacheBlk *findBlock(Addr addr) {
+        return tags->findBlock(addr);
+    }
 
     bool inCache(Addr addr) {
         return (tags->findBlock(addr) != 0);
     }
 
     bool inMissQueue(Addr addr) {
-        return (missQueue->findMSHR(addr) != 0);
+        return (mshrQueue.findMatch(addr) != 0);
     }
 };
 
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
index fa00a0f5a..d2aba9480 100644
--- a/src/mem/cache/cache_blk.hh
+++ b/src/mem/cache/cache_blk.hh
@@ -39,6 +39,7 @@
 
 #include "sim/core.hh"		// for Tick
 #include "arch/isa_traits.hh"	// for Addr
+#include "mem/packet.hh"
 #include "mem/request.hh"
 
 /**
@@ -51,8 +52,6 @@ enum CacheBlkStatusBits {
     BlkWritable =	0x02,
     /** dirty (modified) */
     BlkDirty =		0x04,
-    /** compressed */
-    BlkCompressed =	0x08,
     /** block was referenced */
     BlkReferenced =	0x10,
     /** block was a hardware prefetch yet unaccessed*/
@@ -174,20 +173,11 @@ class CacheBlk
      * Check to see if a block has been written.
      * @return True if the block is dirty.
      */
-    bool isModified() const
+    bool isDirty() const
     {
         return (status & BlkDirty) != 0;
     }
 
-    /**
-     * Check to see if this block contains compressed data.
-     * @return True iF the block's data is compressed.
-     */
-    bool isCompressed() const
-    {
-        return (status & BlkCompressed) != 0;
-    }
-
     /**
      * Check if this block has been referenced.
      * @return True if the block has been referenced.
@@ -213,10 +203,10 @@ class CacheBlk
      * redundant records on the list, but that's OK, as they'll all
      * get blown away at the next store.
      */
-    void trackLoadLocked(Request *req)
+    void trackLoadLocked(PacketPtr pkt)
     {
-        assert(req->isLocked());
-        lockList.push_front(Lock(req));
+        assert(pkt->isLocked());
+        lockList.push_front(Lock(pkt->req));
     }
 
     /**
@@ -230,9 +220,10 @@ class CacheBlk
      * @return True if write should proceed, false otherwise.  Returns
      * false only in the case of a failed store conditional.
      */
-    bool checkWrite(Request *req)
+    bool checkWrite(PacketPtr pkt)
     {
-        if (req->isLocked()) {
+        Request *req = pkt->req;
+        if (pkt->isLocked()) {
             // it's a store conditional... have to check for matching
             // load locked.
             bool success = false;
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index bc1a8a775..307c851a2 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -70,10 +70,6 @@
 #include "base/compression/null_compression.hh"
 #include "base/compression/lzss_compression.hh"
 
-// MissQueue Templates
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-
 // Coherence Templates
 #include "mem/cache/coherence/simple_coherence.hh"
 
@@ -207,13 +203,9 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         else {                                                          \
             BUILD_NULL_PREFETCHER(TAGS);                                \
         }                                                               \
-        Cache<TAGS, c>::Params params(tags, mq, coh, base_params,       \
+        Cache<TAGS, c>::Params params(tags, coh, base_params,       \
                                       pf, prefetch_access, latency, \
                                       true,                             \
-                                      store_compressed,                 \
-                                      adaptive_compression,             \
-                                      compressed_bus,                   \
-                                      compAlg, compression_latency,     \
                                       prefetch_miss);                   \
         Cache<TAGS, c> *retval =                                        \
             new Cache<TAGS, c>(getInstanceName(), params);              \
@@ -301,8 +293,6 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
     } while (0)
 
 #define BUILD_COHERENCE(b) do {						\
-        SimpleCoherence *coh = new SimpleCoherence(protocol);           \
-        BUILD_CACHES(SimpleCoherence);                                  \
     } while (0)
 
 #if defined(USE_TAGGED)
@@ -369,8 +359,9 @@ CREATE_SIM_OBJECT(BaseCache)
     }
 
     // Build BaseCache param object
-    BaseCache::Params base_params(addr_range, latency,
-                                  block_size, max_miss_count);
+    BaseCache::Params base_params(latency, block_size,
+                                  mshrs, tgts_per_mshr, write_buffers,
+                                  max_miss_count);
 
     //Warnings about prefetcher policy
     if (pf_policy == "none" && (prefetch_miss || prefetch_access)) {
@@ -408,14 +399,8 @@ CREATE_SIM_OBJECT(BaseCache)
     const void *repl = NULL;
 #endif
 
-    if (mshrs == 1 /*|| out_bus->doEvents() == false*/) {
-        BlockingBuffer *mq = new BlockingBuffer(true);
-        BUILD_COHERENCE(BlockingBuffer);
-    } else {
-        MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers,
-                                      true, prefetch_miss);
-        BUILD_COHERENCE(MissQueue);
-    }
+    SimpleCoherence *coh = new SimpleCoherence(protocol);
+    BUILD_CACHES(SimpleCoherence);
     return NULL;
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a7f96603e..0f66e613c 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -37,17 +37,8 @@
  * Cache definitions.
  */
 
-#include <assert.h>
-#include <math.h>
-
-#include <cassert>
-#include <iostream>
-#include <cstring>
-#include <string>
-
 #include "sim/host.hh"
 #include "base/misc.hh"
-#include "cpu/smt.hh"
 
 #include "mem/cache/cache.hh"
 #include "mem/cache/cache_blk.hh"
@@ -56,25 +47,16 @@
 
 #include "sim/sim_exit.hh" // for SimExitEvent
 
-bool SIGNAL_NACK_HACK;
 
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::Cache(const std::string &_name,
                                  Cache<TagStore,Coherence>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
-      tags(params.tags), missQueue(params.missQueue),
+      tags(params.tags),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      hitLatency(params.hitLatency),
-      compressionAlg(params.compressionAlg),
-      blkSize(params.blkSize),
       doFastWrites(params.doFastWrites),
-      prefetchMiss(params.prefetchMiss),
-      storeCompressed(params.storeCompressed),
-      compressOnWriteback(params.compressOnWriteback),
-      compLatency(params.compLatency),
-      adaptiveCompression(params.adaptiveCompression),
-      writebackCompressed(params.writebackCompressed)
+      prefetchMiss(params.prefetchMiss)
 {
     cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
     memSidePort = new MemSidePort(_name + "-mem_side_port", this);
@@ -82,12 +64,8 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
     memSidePort->setOtherPort(cpuSidePort);
 
     tags->setCache(this);
-    missQueue->setCache(this);
-    missQueue->setPrefetcher(prefetcher);
     coherence->setCache(this);
     prefetcher->setCache(this);
-    invalidateReq = new Request((Addr) NULL, blkSize, 0);
-    invalidatePkt = new Packet(invalidateReq, MemCmd::InvalidateReq, 0);
 }
 
 template<class TagStore, class Coherence>
@@ -96,51 +74,221 @@ Cache<TagStore,Coherence>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    missQueue->regStats(name());
     coherence->regStats(name());
     prefetcher->regStats(name());
 }
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
-                                        PacketList & writebacks, bool update)
+Port *
+Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "" || if_name == "cpu_side") {
+        return cpuSidePort;
+    } else if (if_name == "mem_side") {
+        return memSidePort;
+    } else if (if_name == "functional") {
+        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    } else {
+        panic("Port name %s unrecognized\n", if_name);
+    }
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+{
+    if (cpuSidePort == p || memSidePort == p)
+        panic("Can only delete functional ports\n");
+
+    delete p;
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 {
-    // Set the block offset here
+    uint64_t overwrite_val;
+    bool overwrite_mem;
+    uint64_t condition_val64;
+    uint32_t condition_val32;
+
     int offset = tags->extractBlkOffset(pkt->getAddr());
+    uint8_t *blk_data = blk->data + offset;
+
+    assert(sizeof(uint64_t) >= pkt->getSize());
+
+    overwrite_mem = true;
+    // keep a copy of our possible write value, and copy what is at the
+    // memory address into the packet
+    pkt->writeData((uint8_t *)&overwrite_val);
+    pkt->setData(blk_data);
+
+    if (pkt->req->isCondSwap()) {
+        if (pkt->getSize() == sizeof(uint64_t)) {
+            condition_val64 = pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val64, blk_data,
+                                         sizeof(uint64_t));
+        } else if (pkt->getSize() == sizeof(uint32_t)) {
+            condition_val32 = (uint32_t)pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val32, blk_data,
+                                         sizeof(uint32_t));
+        } else
+            panic("Invalid size for conditional read/write\n");
+    }
+
+    if (overwrite_mem)
+        std::memcpy(blk_data, &overwrite_val, pkt->getSize());
+}
+
+
+/////////////////////////////////////////////////////
+//
+// MSHR helper functions
+//
+/////////////////////////////////////////////////////
+
+
+template<class TagStore, class Coherence>
+MSHR *
+Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
+                                          bool isFill, bool requestBus)
+{
+    int  size = isFill ? blkSize : pkt->getSize();
+    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+
+    MSHR *mshr = NULL;
+
+    if (pkt->isWrite()) {
+        /**
+         * @todo Add write merging here.
+         */
+        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
+        mshr->order = order++;
 
-    BlkType *blk = NULL;
-    if (update) {
-        blk = tags->findBlock(pkt->getAddr(), lat);
+        if (writeBuffer.isFull()) {
+            setBlocked(Blocked_NoWBBuffers);
+        }
+
+        if (requestBus) {
+            requestMemSideBus(Request_WB, time);
+        }
     } else {
-        blk = tags->findBlock(pkt->getAddr());
-        lat = 0;
+        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
+        mshr->order = order++;
+        if (mshrQueue.isFull()) {
+            setBlocked(Blocked_NoMSHRs);
+        }
+        if (requestBus) {
+            requestMemSideBus(Request_MSHR, time);
+        }
     }
-    if (blk != NULL) {
 
-        if (!update) {
+    assert(mshr != NULL);
+    return mshr;
+}
 
-            if (pkt->isWrite()){
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset+pkt->getSize() <= blkSize);
-                std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                       pkt->getSize());
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
-            } else if (!(pkt->flags & SATISFIED)) {
-                pkt->flags |= SATISFIED;
-                pkt->result = Packet::Success;
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset + pkt->getSize() <=blkSize);
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                       pkt->getSize());
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::markInService(MSHR *mshr)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    /**
+     * @todo Should include MSHRQueue pointer in MSHR to select the correct
+     * one.
+     */
+    if (mshr->queue == &writeBuffer) {
+        // Forwarding a write/ writeback, don't need to change
+        // the command
+        unblock = writeBuffer.isFull();
+        writeBuffer.markInService(mshr);
+        if (!writeBuffer.havePending()){
+            deassertMemSideBusRequest(Request_WB);
+        }
+        if (unblock) {
+            // Do we really unblock?
+            unblock = !writeBuffer.isFull();
+            cause = Blocked_NoWBBuffers;
+        }
+    } else {
+        assert(mshr->queue == &mshrQueue);
+        unblock = mshrQueue.isFull();
+        mshrQueue.markInService(mshr);
+        if (!mshrQueue.havePending()){
+            deassertMemSideBusRequest(Request_MSHR);
+        }
+#if 0
+        if (mshr->originalCmd == MemCmd::HardPFReq) {
+            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
+                    name());
+            //Also clear pending if need be
+            if (!prefetcher->havePending())
+            {
+                deassertMemSideBusRequest(Request_PF);
             }
-            return blk;
         }
+#endif
+        if (unblock) {
+            unblock = !mshrQueue.isFull();
+            cause = Blocked_NoMSHRs;
+        }
+    }
+    if (unblock) {
+        clearBlocked(cause);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::squash(int threadNum)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
 
-        // Hit
+    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
+        noTargetMSHR = NULL;
+        unblock = true;
+        cause = Blocked_NoTargets;
+    }
+    if (mshrQueue.isFull()) {
+        unblock = true;
+        cause = Blocked_NoMSHRs;
+    }
+    mshrQueue.squash(threadNum);
+    if (!mshrQueue.havePending()) {
+        deassertMemSideBusRequest(Request_MSHR);
+    }
+    if (unblock && !mshrQueue.isFull()) {
+        clearBlocked(cause);
+    }
+}
+
+/////////////////////////////////////////////////////
+//
+// Access path: requests coming in from the CPU side
+//
+/////////////////////////////////////////////////////
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+{
+    bool satisfied = false;  // assume the worst
+
+    if (prefetchAccess) {
+        //We are determining prefetches on access stream, call prefetcher
+        prefetcher->handleMiss(pkt, curTick);
+    }
+
+    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
+            (blk) ? "hit" : "miss");
+
+    if (blk != NULL) {
+        // HIT
         if (blk->isPrefetch()) {
             //Signal that this was a hit under prefetch (no need for
             //use prefetch (only can get here if true)
@@ -154,639 +302,620 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
             }
         }
 
-        if ((pkt->isReadWrite() && blk->isWritable()) ||
-            (pkt->isWrite() && blk->isWritable()) ||
-            (pkt->isRead() && blk->isValid())) {
-
-            // We are satisfying the request
-            pkt->flags |= SATISFIED;
-
-            if (blk->isCompressed()) {
-                // If the data is compressed, need to increase the latency
-                lat += (compLatency/4);
-            }
-
-            bool write_data = false;
-
-            assert(verifyData(blk));
-
-            assert(offset < blkSize);
-            assert(pkt->getSize() <= blkSize);
-            assert(offset+pkt->getSize() <= blkSize);
+        if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) {
+            // OK to satisfy access
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            satisfied = true;
 
-            if (pkt->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    write_data = true;
+            if (pkt->cmd == MemCmd::SwapReq) {
+                cmpAndSwap(blk, pkt);
+            } else if (pkt->isWrite()) {
+                if (blk->checkWrite(pkt)) {
                     blk->status |= BlkDirty;
-                    std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                           pkt->getSize());
+                    pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
             } else {
                 assert(pkt->isRead());
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
+                if (pkt->isLocked()) {
+                    blk->trackLoadLocked(pkt);
                 }
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                            pkt->getSize());
-            }
-
-            if (write_data ||
-                (adaptiveCompression && blk->isCompressed()))
-            {
-                // If we wrote data, need to update the internal block
-                // data.
-                updateData(blk, writebacks,
-                           !(adaptiveCompression &&
-                             blk->isReferenced()));
+                pkt->setDataFromBlock(blk->data, blkSize);
             }
         } else {
-            // permission violation, treat it as a miss
-            blk = NULL;
+            // permission violation... nothing to do here, leave unsatisfied
+            // for statistics purposes this counts like a complete miss
+            incMissCount(pkt);
         }
     } else {
         // complete miss (no matching block)
-        if (pkt->req->isLocked() && pkt->isWrite()) {
+        incMissCount(pkt);
+
+        if (pkt->isLocked() && pkt->isWrite()) {
             // miss on store conditional... just give up now
             pkt->req->setExtraData(0);
-            pkt->flags |= SATISFIED;
+            satisfied = true;
         }
     }
 
-    return blk;
+    return satisfied;
 }
 
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr &pkt){
-            uint64_t overwrite_val;
-            bool overwrite_mem;
-            uint64_t condition_val64;
-            uint32_t condition_val32;
-
-            int offset = tags->extractBlkOffset(pkt->getAddr());
-
-            assert(sizeof(uint64_t) >= pkt->getSize());
-
-            overwrite_mem = true;
-            // keep a copy of our possible write value, and copy what is at the
-            // memory address into the packet
-            std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                        pkt->getSize());
-
-            if (pkt->req->isCondSwap()) {
-                if (pkt->getSize() == sizeof(uint64_t)) {
-                    condition_val64 = pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val64, blk->data + offset,
-                                                 sizeof(uint64_t));
-                } else if (pkt->getSize() == sizeof(uint32_t)) {
-                    condition_val32 = (uint32_t)pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val32, blk->data + offset,
-                                                 sizeof(uint32_t));
-                } else
-                    panic("Invalid size for conditional read/write\n");
-            }
-
-            if (overwrite_mem)
-                std::memcpy(blk->data + offset,
-                            &overwrite_val, pkt->getSize());
-
-}
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks,
-                                      PacketPtr target)
+bool
+Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 {
-#ifndef NDEBUG
-    BlkType *tmp_blk = tags->findBlock(pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+//@todo Add back in MemDebug Calls
+//    MemDebug::cacheAccess(pkt);
 
+    // we charge hitLatency for doing just about anything here
+    Tick time =  curTick + hitLatency;
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (pkt->req->isUncacheable()) {
+        allocateBuffer(pkt, time, false, true);
+        assert(pkt->needsResponse()); // else we should delete it here??
+        return true;
     }
 
-        blk->whenReady = pkt->finishTime;
+    PacketList writebacks;
+    int lat = hitLatency;
+    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
+    bool satisfied = false;
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
 
-    // Respond to target, if any
-    if (target) {
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
-        target->flags |= SATISFIED;
+    if (!mshr) {
+        // no outstanding access to this block, look up in cache
+        // (otherwise if we allow reads while there's an outstanding
+        // write miss, the read could return stale data out of the
+        // cache block... a more aggressive system could detect the
+        // overlap (if any) and forward data out of the MSHRs, but we
+        // don't do that yet)
+        satisfied = access(pkt, blk, lat);
+    }
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            tags->invalidateBlk(blk);
-            blk = NULL;
+#if 0
+    // If this is a block size write/hint (WH64) allocate the block here
+    // if the coherence protocol allows it.
+    /** @todo make the fast write alloc (wh64) work with coherence. */
+    /** @todo Do we want to do fast writes for writebacks as well? */
+    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
+        (pkt->cmd == MemCmd::WriteReq
+         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
+        // not outstanding misses, can do this
+        MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr());
+        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
+            if (outstanding_miss) {
+                warn("WriteInv doing a fastallocate"
+                     "with an outstanding miss to the same address\n");
+            }
+            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
+                                   writebacks);
+            ++fastWrites;
         }
+    }
+#endif
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-                    blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isReadWrite() || target->isRead());
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
+    // copy writebacks to write buffer
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        allocateBuffer(wbPkt, time, false, true);
+        writebacks.pop_front();
+    }
+
+    bool needsResponse = pkt->needsResponse();
+
+    if (satisfied) {
+        assert(needsResponse);
+        pkt->makeTimingResponse();
+        cpuSidePort->respond(pkt, curTick+lat);
+    } else {
+        // miss
+        if (prefetchMiss)
+            prefetcher->handleMiss(pkt, time);
+
+        if (mshr) {
+            // MSHR hit
+            //@todo remove hw_pf here
+            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
+                mshr->threadNum = -1;
             }
+            mshr->allocateTarget(pkt, true);
+            if (mshr->getNumTargets() == numTarget) {
+                noTargetMSHR = mshr;
+                setBlocked(Blocked_NoTargets);
+                mshrQueue.moveToFront(mshr);
+            }
+        } else {
+            // no MSHR
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // always mark as cache fill for now... if we implement
+            // no-write-allocate or bypass accesses this will have to
+            // be changed.
+            allocateBuffer(pkt, time, true, true);
         }
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    if (!needsResponse) {
+        // Need to clean up the packet on a writeback miss, but leave
+        // the request for the next level.
+        delete pkt;
     }
-    return blk;
+
+    return true;
 }
 
+
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks, PacketPtr pkt)
+Tick
+Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
-/*
-#ifndef NDEBUG
-    BlkType *tmp_blk = findBlock(mshr->pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    PacketPtr pkt = mshr->pkt;*/
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+    // should assert here that there are no outstanding MSHRs or
+    // writebacks... that would mean that someone used an atomic
+    // access in timing mode
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (pkt->req->isUncacheable()) {
+        // Uncacheables just go through
+        return memSidePort->sendAtomic(pkt);
     }
 
-    blk->whenReady = pkt->finishTime;
+    PacketList writebacks;
+    int lat = hitLatency;
+    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
+    bool satisfied = access(pkt, blk, lat);
 
+    if (!satisfied) {
+        // MISS
+        CacheBlk::State old_state = (blk) ? blk->status : 0;
+        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
+        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
+        busPkt.allocate();
 
-    // respond to MSHR targets, if any
+        DPRINTF(Cache, "Sending a atomic %s for %x\n",
+                busPkt.cmdString(), busPkt.getAddr());
 
-    // First offset for critical word first calculations
-    int initial_offset = 0;
+        lat += memSidePort->sendAtomic(&busPkt);
 
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->getOffset(blkSize);
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt.cmdString(), busPkt.getAddr(), old_state);
+
+        blk = handleFill(&busPkt, blk, writebacks);
+        bool status = satisfyCpuSideRequest(pkt, blk);
+        assert(status);
     }
 
-    while (mshr->hasTargets()) {
-        PacketPtr target = mshr->getTarget();
+    // We now have the block one way or another (hit or completed miss)
 
-        target->flags |= SATISFIED;
+    // Handle writebacks if needed
+    while (!writebacks.empty()){
+        PacketPtr wbPkt = writebacks.front();
+        memSidePort->sendAtomic(wbPkt);
+        writebacks.pop_front();
+        delete wbPkt;
+    }
 
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
-        }
+    if (pkt->needsResponse()) {
+        pkt->makeAtomicResponse();
+        pkt->result = Packet::Success;
+    }
 
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+    return lat;
+}
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            //Mark the blk as invalid now, if it hasn't been already
-            if (blk) {
-                tags->invalidateBlk(blk);
-                blk = NULL;
-            }
 
-            //Also get rid of the invalidate
-            mshr->popTarget();
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
+                                            CachePort *otherSidePort)
+{
+    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
-            DPRINTF(Cache, "Popping off a Invalidate for addr %x\n",
-                    pkt->getAddr());
+    if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) {
+        // request satisfied from block
+        return;
+    }
 
-            continue;
-        }
+    // Need to check for outstanding misses and writes
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-            blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isRead() || target->isReadWrite() );
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (target->req->isLocked()) {
-                    blk->trackLoadLocked(target->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
-            }
-        } else {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-//            mshr->pkt = pkt;
-            break;
-        }
-        if (!target->req->isUncacheable()) {
-            missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
+    // There can only be one matching outstanding miss.
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    if (mshr) {
+        MSHR::TargetList *targets = mshr->getTargetList();
+        MSHR::TargetList::iterator i = targets->begin();
+        MSHR::TargetList::iterator end = targets->end();
+        for (; i != end; ++i) {
+            PacketPtr targetPkt = i->pkt;
+            if (pkt->checkFunctional(targetPkt))
+                return;
         }
-        respond(target, completion_time);
-        mshr->popTarget();
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    // There can be many matching outstanding writes.
+    std::vector<MSHR*> writes;
+    writeBuffer.findMatches(blk_addr, writes);
+    for (int i = 0; i < writes.size(); ++i) {
+        MSHR *mshr = writes[i];
+        if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
+            return;
     }
 
-    return blk;
+    otherSidePort->checkAndSendFunctional(pkt);
 }
 
 
+/////////////////////////////////////////////////////
+//
+// Response handling: responses from the memory side
+//
+/////////////////////////////////////////////////////
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state,
-                                       PacketPtr &pkt)
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
 {
-    //Must have the block to supply
-    assert(blk);
-    // Can only supply data, and if it hasn't already been supllied
-    assert(pkt->isRead());
-    assert(!(pkt->flags & SATISFIED));
-    pkt->flags |= SATISFIED;
-    Addr offset = pkt->getOffset(blkSize);
-    assert(offset < blkSize);
-    assert(pkt->getSize() <= blkSize);
-    assert(offset + pkt->getSize() <=blkSize);
-    std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset, pkt->getSize());
-
-    handleSnoop(blk, new_state);
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+#ifndef NDEBUG
+    int num_targets = mshr->getNumTargets();
+#endif
+
+    bool unblock = false;
+    bool unblock_target = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
+        // targets were handled in the cache tags
+        if (mshr == noTargetMSHR) {
+            // we always clear at least one target
+            unblock_target = true;
+            cause = Blocked_NoTargets;
+            noTargetMSHR = NULL;
+        }
+
+        if (mshr->hasTargets()) {
+            // Didn't satisfy all the targets, need to resend
+            mshrQueue.markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus(Request_MSHR, time);
+        }
+        else {
+            unblock = mshrQueue.isFull();
+            mshrQueue.deallocate(mshr);
+            if (unblock) {
+                unblock = !mshrQueue.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
+            // Should only have 1 target if we had any
+            assert(num_targets == 1);
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->cpuSide);
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+            assert(!mshr->hasTargets());
+        }
+        else if (mshr->hasTargets()) {
+            //Must be a no_allocate with possibly more than one target
+            assert(!mshr->isCacheFill);
+            while (mshr->hasTargets()) {
+                MSHR::Target *target = mshr->getTarget();
+                assert(target->isCpuSide());
+                mshr->popTarget();
+                if (pkt->isRead()) {
+                    target->pkt->setData(pkt->getPtr<uint8_t>());
+                }
+                cpuSidePort->respond(target->pkt, time);
+            }
+        }
+
+        if (pkt->isWrite()) {
+            // If the wrtie buffer is full, we might unblock now
+            unblock = writeBuffer.isFull();
+            writeBuffer.deallocate(mshr);
+            if (unblock) {
+                // Did we really unblock?
+                unblock = !writeBuffer.isFull();
+                cause = Blocked_NoWBBuffers;
+            }
+        } else {
+            unblock = mshrQueue.isFull();
+            mshrQueue.deallocate(mshr);
+            if (unblock) {
+                unblock = !mshrQueue.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    }
+    if (unblock || unblock_target) {
+        clearBlocked(cause);
+    }
 }
 
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state)
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
 {
-    if (blk && blk->status != new_state) {
-        if ((new_state && BlkValid) == 0) {
-            tags->invalidateBlk(blk);
-        } else {
-            assert(new_state >= 0 && new_state < 128);
-            blk->status = new_state;
+    Tick time = curTick + hitLatency;
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+    assert(mshr);
+    if (pkt->result == Packet::Nacked) {
+        //pkt->reinitFromRequest();
+        warn("NACKs from devices not connected to the same bus "
+             "not implemented\n");
+        return;
+    }
+    assert(pkt->result != Packet::BadAddress);
+    assert(pkt->result == Packet::Success);
+    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
+
+    if (mshr->isCacheFill) {
+        DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
+                pkt->getAddr());
+        BlkType *blk = tags->findBlock(pkt->getAddr());
+        PacketList writebacks;
+        blk = handleFill(pkt, blk, writebacks);
+        satisfyMSHR(mshr, pkt, blk);
+        // copy writebacks to write buffer
+        while (!writebacks.empty()) {
+            PacketPtr wbPkt = writebacks.front();
+            allocateBuffer(wbPkt, time, false, true);
+            writebacks.pop_front();
         }
     }
+    handleResponse(pkt, time);
 }
 
+
+
+
 template<class TagStore, class Coherence>
 PacketPtr
 Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
 {
-    assert(blk && blk->isValid() && blk->isModified());
-    int data_size = blkSize;
-    data_size = blk->size;
-    if (compressOnWriteback) {
-        // not already compressed
-        // need to compress to ship it
-        assert(data_size == blkSize);
-        uint8_t *tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->compress(tmp_data,blk->data,
-                                      data_size);
-        delete [] tmp_data;
-    }
+    assert(blk && blk->isValid() && blk->isDirty());
 
-/*    PacketPtr writeback =
-        buildWritebackReq(tags->regenerateBlkAddr(blk->tag, blk->set),
-                          blk->asid, blkSize,
-                          blk->data, data_size);
-*/
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     Request *writebackReq =
         new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
     PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
     writeback->allocate();
-    std::memcpy(writeback->getPtr<uint8_t>(),blk->data,blkSize);
+    std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
 
     blk->status &= ~BlkDirty;
     return writeback;
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::verifyData(BlkType *blk)
-{
-    bool retval;
-    // The data stored in the blk
-    uint8_t *blk_data = new uint8_t[blkSize];
-    tags->readData(blk, blk_data);
-    // Pointer for uncompressed data, assumed uncompressed
-    uint8_t *tmp_data = blk_data;
-    // The size of the data being stored, assumed uncompressed
-    int data_size = blkSize;
-
-    // If the block is compressed need to uncompress to access
-    if (blk->isCompressed()){
-        // Allocate new storage for the data
-        tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->uncompress(tmp_data,blk_data, blk->size);
-        assert(data_size == blkSize);
-        // Don't need to keep blk_data around
-        delete [] blk_data;
-    } else {
-        assert(blkSize == blk->size);
-    }
-
-    retval = std::memcmp(tmp_data, blk->data, blkSize) == 0;
-    delete [] tmp_data;
-    return retval;
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::updateData(BlkType *blk, PacketList &writebacks,
-                                        bool compress_block)
-{
-    if (storeCompressed && compress_block) {
-        uint8_t *comp_data = new uint8_t[blkSize];
-        int new_size = compressionAlg->compress(comp_data, blk->data, blkSize);
-        if (new_size > (blkSize - tags->getSubBlockSize())){
-            // no benefit to storing it compressed
-            blk->status &= ~BlkCompressed;
-            tags->writeData(blk, blk->data, blkSize,
-                          writebacks);
-        } else {
-            // Store the data compressed
-            blk->status |= BlkCompressed;
-            tags->writeData(blk, comp_data, new_size,
-                          writebacks);
-        }
-        delete [] comp_data;
-    } else {
-        blk->status &= ~BlkCompressed;
-        tags->writeData(blk, blk->data, blkSize, writebacks);
-    }
-}
-
+// Note that the reason we return a list of writebacks rather than
+// inserting them directly in the write buffer is that this function
+// is called by both atomic and timing-mode accesses, and in atomic
+// mode we don't mess with the write buffer (we just perform the
+// writebacks atomically once the original request is complete).
 template<class TagStore, class Coherence>
 typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::doReplacement(BlkType *blk, PacketPtr &pkt,
-                                         CacheBlk::State new_state,
-                                         PacketList &writebacks)
+Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
+                                      PacketList &writebacks)
 {
+    Addr addr = pkt->getAddr();
+
     if (blk == NULL) {
+
         // need to do a replacement
-        BlkList compress_list;
-        blk = tags->findReplacement(pkt, writebacks, compress_list);
-        while (adaptiveCompression && !compress_list.empty()) {
-            updateData(compress_list.front(), writebacks, true);
-            compress_list.pop_front();
-        }
+        blk = tags->findReplacement(addr, writebacks);
         if (blk->isValid()) {
             DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag,blk->set), pkt->getAddr(),
-                    (blk->isModified()) ? "writeback" : "clean");
+                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
+                    blk->isDirty() ? "writeback" : "clean");
 
-            if (blk->isModified()) {
-                // Need to write the data back
+            if (blk->isDirty()) {
+                // Save writeback packet for handling by caller
                 writebacks.push_back(writebackBlk(blk));
             }
         }
-        blk->tag = tags->extractTag(pkt->getAddr(), blk);
+
+        blk->tag = tags->extractTag(addr);
+        blk->status = coherence->getNewState(pkt);
+        assert(pkt->isRead());
     } else {
-        // must be a status change
-        // assert(blk->status != new_state);
-        if (blk->status == new_state) warn("Changing state to same value\n");
+        // existing block... probably an upgrade
+        assert(blk->tag == tags->extractTag(addr));
+        // either we're getting new data or the block should already be valid
+        assert(pkt->isRead() || blk->isValid());
+        CacheBlk::State old_state = blk->status;
+        blk->status = coherence->getNewState(pkt, old_state);
+        if (blk->status != old_state)
+            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+                    addr, old_state, blk->status);
+        else
+            warn("Changing state to same value\n");
     }
 
-    blk->status = new_state;
+    // if we got new data, copy it in
+    if (pkt->isRead()) {
+        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    }
+
+    blk->whenReady = pkt->finishTime;
+
     return blk;
 }
 
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr &pkt)
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-//@todo Add back in MemDebug Calls
-//    MemDebug::cacheAccess(pkt);
-    BlkType *blk = NULL;
-    PacketList writebacks;
-    int size = blkSize;
-    int lat = hitLatency;
-    if (prefetchAccess) {
-        //We are determining prefetches on access stream, call prefetcher
-        prefetcher->handleMiss(pkt, curTick);
-    }
-
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
-    if (!pkt->req->isUncacheable()) {
-        if (!missQueue->findMSHR(blk_addr)) {
-            blk = handleAccess(pkt, lat, writebacks);
-        }
-    } else {
-        size = pkt->getSize();
-    }
-    // If this is a block size write/hint (WH64) allocate the block here
-    // if the coherence protocol allows it.
-    /** @todo make the fast write alloc (wh64) work with coherence. */
-    /** @todo Do we want to do fast writes for writebacks as well? */
-    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
-        (pkt->cmd == MemCmd::WriteReq
-         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
-        // not outstanding misses, can do this
-        MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr());
-        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
-            if (outstanding_miss) {
-                warn("WriteInv doing a fastallocate"
-                     "with an outstanding miss to the same address\n");
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
             }
-            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
-                                   writebacks);
-            ++fastWrites;
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        return true;
+    } else {
+        return false;
     }
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        missQueue->doWriteback(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
-    }
+}
+
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
+{
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
+{
+    // respond to MSHR targets, if any
 
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
-            (blk) ? "hit" : "miss");
+    // First offset for critical word first calculations
+    int initial_offset = 0;
 
-    if (blk) {
-        // Hit
-        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        // clear dirty bit if write through
-        respond(pkt, curTick+lat);
-        return true;
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
     }
 
-    // Miss
-    if (!pkt->req->isUncacheable()) {
-        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        /** @todo Move miss count code into BaseCache */
-        if (missCount) {
-            --missCount;
-            if (missCount == 0)
-                exitSimLoop("A cache reached the maximum miss count");
-        }
-    }
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
 
-    if (pkt->flags & SATISFIED) {
-        // happens when a store conditional fails because it missed
-        // the cache completely
-        respond(pkt, curTick+lat);
-    } else {
-        missQueue->handleMiss(pkt, size, curTick + hitLatency);
-    }
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            break;
+        }
 
-    if (!pkt->needsResponse()) {
-        //Need to clean up the packet on a writeback miss, but leave the request
-        //for the next level.
-        delete pkt;
-    }
 
-    return true;
-}
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
+        }
 
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::getPacket()
-{
-    assert(missQueue->havePending());
-    PacketPtr pkt = missQueue->getPacket();
-    if (pkt) {
-        if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == MemCmd::HardPFReq)
-                misses[MemCmd::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-            missQueue->setBusCmd(pkt, cmd);
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
         }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
-
-    assert(!isMemSideBusRequested() || missQueue->havePending());
-    assert(!pkt || pkt->time <= curTick);
-    SIGNAL_NACK_HACK = false;
-    return pkt;
 }
 
+
+/////////////////////////////////////////////////////
+//
+// Snoop path: requests coming in from the memory side
+//
+/////////////////////////////////////////////////////
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
-                                                bool success)
+Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
+                                                  uint8_t *blk_data)
 {
-    if (success && !(SIGNAL_NACK_HACK)) {
-        //Remember if it was an upgrade because writeback MSHR's are removed
-        //in Mark in Service
-        bool upgrade = (mshr->pkt && mshr->pkt->cmd == MemCmd::UpgradeReq);
-
-        missQueue->markInService(mshr->pkt, mshr);
-
-        //Temp Hack for UPGRADES
-        if (upgrade) {
-            assert(pkt);  //Upgrades need to be fixed
-            pkt->flags &= ~CACHE_LINE_FILL;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", pkt->getAddr(), old_state, new_state);
-            //Set the state on the upgrade
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
-            PacketList writebacks;
-            handleFill(blk, mshr, new_state, writebacks, pkt);
-            assert(writebacks.empty());
-            missQueue->handleResponse(pkt, curTick + hitLatency);
-        }
-    } else if (pkt && !pkt->req->isUncacheable()) {
-        pkt->flags &= ~NACKED_LINE;
-        SIGNAL_NACK_HACK = false;
-        pkt->flags &= ~SATISFIED;
-
-//Rmove copy from mshr
-        delete mshr->pkt;
-        mshr->pkt = pkt;
-
-        missQueue->restoreOrigCmd(pkt);
-    }
+    // timing-mode snoop responses require a new packet
+    PacketPtr pkt = new Packet(req_pkt);
+    pkt->allocate();
+    pkt->makeTimingResponse();
+    pkt->setDataFromBlock(blk_data, blkSize);
+    memSidePort->respond(pkt, curTick + hitLatency);
 }
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr &pkt)
+Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
+                                       bool is_timing)
 {
-    BlkType *blk = NULL;
-    if (pkt->senderState) {
-        //Delete temp copy in MSHR, restore it.
-        delete ((MSHR*)pkt->senderState)->pkt;
-        ((MSHR*)pkt->senderState)->pkt = pkt;
-        if (pkt->result == Packet::Nacked) {
-            //pkt->reinitFromRequest();
-            warn("NACKs from devices not connected to the same bus "
-                 "not implemented\n");
-            return;
-        }
-        if (pkt->result == Packet::BadAddress) {
-            //Make the response a Bad address and send it
+    if (!blk || !blk->isValid()) {
+        return;
+    }
+
+    // we may end up modifying both the block state and the packet (if
+    // we respond in atomic mode), so just figure out what to do now
+    // and then do it later
+    bool supply = blk->isDirty() && pkt->isRead();
+    bool invalidate = pkt->isInvalidate();
+
+    if (pkt->isRead() && !pkt->isInvalidate()) {
+        assert(!pkt->needsExclusive());
+        pkt->assertShared();
+        int bits_to_clear = BlkWritable;
+        const bool haveOwnershipState = true; // for now
+        if (!haveOwnershipState) {
+            // if we don't support pure ownership (dirty && !writable),
+            // have to clear dirty bit here, assume memory snarfs data
+            // on cache-to-cache xfer
+            bits_to_clear |= BlkDirty;
         }
-//	MemDebug::cacheResponse(pkt);
-        DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
-
-        if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-            DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
-                    pkt->getAddr());
-            blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            PacketList writebacks;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from "
-                        "state %i to %i\n",
-                        pkt->getAddr(),
-                        old_state, new_state);
-            blk = handleFill(blk, (MSHR*)pkt->senderState,
-                                   new_state, writebacks, pkt);
-            while (!writebacks.empty()) {
-                PacketPtr wbPkt = writebacks.front();
-                missQueue->doWriteback(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
+        blk->status &= ~bits_to_clear;
+    }
+
+    if (supply) {
+        pkt->assertMemInhibit();
+        if (is_timing) {
+            doTimingSupplyResponse(pkt, blk->data);
+        } else {
+            pkt->makeAtomicResponse();
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
-        missQueue->handleResponse(pkt, curTick + hitLatency);
     }
+
+    // Do this last in case it deallocates block data or something
+    // like that
+    if (invalidate) {
+        tags->invalidateBlk(blk);
+    }
+
+    DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n",
+            pkt->cmdString(), blockAlign(pkt->getAddr()),
+            supply ? "supplying data, " : "", blk->status);
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
+Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
@@ -794,351 +923,190 @@ Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
         return;
     }
 
-    ///// PROPAGATE SNOOP UPWARD HERE
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    if (coherence->hasProtocol() || pkt->isInvalidate()) {
-        //@todo Move this into handle bus req
-        //If we find an mshr, and it is in service, we need to NACK or
-        //invalidate
-        if (mshr) {
-            if (mshr->inService) {
-                if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
-                    && (pkt->cmd != MemCmd::InvalidateReq
-                        && pkt->cmd != MemCmd::WriteInvalidateReq)) {
-                    //If the outstanding request was an invalidate
-                    //(upgrade,readex,..)  Then we need to ACK the request
-                    //until we get the data Also NACK if the outstanding
-                    //request is not a cachefill (writeback)
-                    assert(!(pkt->flags & SATISFIED));
-                    pkt->flags |= SATISFIED;
-                    pkt->flags |= NACKED_LINE;
-                    SIGNAL_NACK_HACK = true;
-                    ///@todo NACK's from other levels
-                    //warn("NACKs from devices not connected to the same bus "
-                    //"not implemented\n");
-                    //respondToSnoop(pkt, curTick + hitLatency);
-                    return;
-                }
-                else {
-                    //The supplier will be someone else, because we are
-                    //waiting for the data.  This should cause this cache to
-                    //be forced to go to the shared state, not the exclusive
-                    //even though the shared line won't be asserted.  But for
-                    //now we will just invlidate ourselves and allow the other
-                    //cache to go into the exclusive state.  @todo Make it so
-                    //a read to a pending read doesn't invalidate.  @todo Make
-                    //it so that a read to a pending read can't be exclusive
-                    //now.
-
-                    //Set the address so find match works
-                    //panic("Don't have invalidates yet\n");
-                    invalidatePkt->addrOverride(pkt->getAddr());
-
-                    //Append the invalidate on
-                    missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to addr: %x\n",
-                            pkt->getAddr());
-                    return;
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    // better not be snooping a request that conflicts with something
+    // we have outstanding...
+    assert(!mshr || !mshr->inService);
+
+    //We also need to check the writeback buffers and handle those
+    std::vector<MSHR *> writebacks;
+    if (writeBuffer.findMatches(blk_addr, writebacks)) {
+        DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
+                pkt->getAddr());
+
+        //Look through writebacks for any non-uncachable writes, use that
+        for (int i=0; i<writebacks.size(); i++) {
+            mshr = writebacks[i];
+            assert(!mshr->isUncacheable());
+
+            if (pkt->isRead()) {
+                pkt->assertMemInhibit();
+                if (!pkt->needsExclusive()) {
+                    pkt->assertShared();
+                } else {
+                    // if we're not asserting the shared line, we need to
+                    // invalidate our copy.  we'll do that below as long as
+                    // the packet's invalidate flag is set...
+                    assert(pkt->isInvalidate());
                 }
+                doTimingSupplyResponse(pkt, mshr->writeData);
             }
-        }
-        //We also need to check the writeback buffers and handle those
-        std::vector<MSHR *> writebacks;
-        if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
-                    pkt->getAddr());
-
-            //Look through writebacks for any non-uncachable writes, use that
-            for (int i=0; i<writebacks.size(); i++) {
-                mshr = writebacks[i];
-
-                if (!mshr->pkt->req->isUncacheable()) {
-                    if (pkt->isRead()) {
-                        //Only Upgrades don't get here
-                        //Supply the data
-                        assert(!(pkt->flags & SATISFIED));
-                        pkt->flags |= SATISFIED;
-
-                        //If we are in an exclusive protocol, make it ask again
-                        //to get write permissions (upgrade), signal shared
-                        pkt->flags |= SHARED_LINE;
-
-                        assert(pkt->isRead());
-                        Addr offset = pkt->getAddr() & (blkSize - 1);
-                        assert(offset < blkSize);
-                        assert(pkt->getSize() <= blkSize);
-                        assert(offset + pkt->getSize() <=blkSize);
-                        std::memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
-
-                        respondToSnoop(pkt, curTick + hitLatency);
-                    }
-
-                    if (pkt->isInvalidate()) {
-                        //This must be an upgrade or other cache will take
-                        //ownership
-                        missQueue->markInService(mshr->pkt, mshr);
-                    }
-                    return;
-                }
+
+            if (pkt->isInvalidate()) {
+                // Invalidation trumps our writeback... discard here
+                assert(0);
+                markInService(mshr);
             }
+            return;
         }
     }
-    CacheBlk::State new_state;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-
-    if (blk && mshr && !mshr->inService && new_state == 0) {
-            //There was a outstanding write to a shared block, not need ReadEx
-            //not update, so change No Allocate param in MSHR
-            mshr->pkt->flags &= ~NO_ALLOCATE;
-    }
-
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-        handleSnoop(blk, new_state, pkt);
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
 
-    handleSnoop(blk, new_state);
+    handleSnoop(pkt, blk, true);
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::snoopResponse(PacketPtr &pkt)
+Tick
+Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
 {
-    //Need to handle the response, if NACKED
-    if (pkt->flags & NACKED_LINE) {
-        //Need to mark it as not in service, and retry for bus
-        assert(0); //Yeah, we saw a NACK come through
-
-        //For now this should never get called, we return false when we see a
-        //NACK instead, by doing this we allow the bus_blocked mechanism to
-        //handle the retry For now it retrys in just 2 cycles, need to figure
-        //out how to change that Eventually we will want to also have success
-        //come in as a parameter Need to make sure that we handle the
-        //functionality that happens on successufl return of the sendAddr
-        //function
+    if (pkt->req->isUncacheable()) {
+        // Can't get a hit on an uncacheable address
+        // Revisit this for multi level coherence
+        return hitLatency;
     }
+
+    BlkType *blk = tags->findBlock(pkt->getAddr());
+    handleSnoop(pkt, blk, false);
+    return hitLatency;
 }
 
 
-/**
- * @todo Fix to not assume write allocate
- */
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::probe(PacketPtr &pkt, bool update,
-                                           CachePort* otherSidePort)
+MSHR *
+Cache<TagStore,Coherence>::getNextMSHR()
 {
-//    MemDebug::cacheProbe(pkt);
-    if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) {
-            //Upgrade or Invalidate, satisfy it, don't forward
-            DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-            pkt->flags |= SATISFIED;
-            return 0;
-        }
-    }
+    // Check both MSHR queue and write buffer for potential requests
+    MSHR *miss_mshr  = mshrQueue.getNextMSHR();
+    MSHR *write_mshr = writeBuffer.getNextMSHR();
 
-    if (!update && (otherSidePort == cpuSidePort)) {
-        // Still need to change data in all locations.
-        otherSidePort->checkAndSendFunctional(pkt);
-        if (pkt->isRead() && pkt->result == Packet::Success)
-            return 0;
+    // Now figure out which one to send... some cases are easy
+    if (miss_mshr && !write_mshr) {
+        return miss_mshr;
+    }
+    if (write_mshr && !miss_mshr) {
+        return write_mshr;
     }
 
-    PacketList writebacks;
-    int lat;
-
-    BlkType *blk = handleAccess(pkt, lat, writebacks, update);
-
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(),
-            pkt->getAddr(), (blk) ? "hit" : "miss");
-
-
-    // Need to check for outstanding misses and writes
-    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
-
-    // There can only be one matching outstanding miss.
-    MSHR* mshr = missQueue->findMSHR(blk_addr);
-
-    // There can be many matching outstanding writes.
-    std::vector<MSHR*> writes;
-    missQueue->findWrites(blk_addr, writes);
-
-    if (!update) {
-        bool notDone = !(pkt->flags & SATISFIED); //Hit in cache (was a block)
-        // Check for data in MSHR and writebuffer.
-        if (mshr) {
-            MSHR::TargetList *targets = mshr->getTargetList();
-            MSHR::TargetList::iterator i = targets->begin();
-            MSHR::TargetList::iterator end = targets->end();
-            for (; i != end && notDone; ++i) {
-                PacketPtr target = *i;
-                // If the target contains data, and it overlaps the
-                // probed request, need to update data
-                if (target->intersect(pkt)) {
-                    DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a MSHR\n",
-                            pkt->cmdString(), blk_addr);
-                    notDone = fixPacket(pkt, target);
-                }
-            }
-        }
-        for (int i = 0; i < writes.size() && notDone; ++i) {
-            PacketPtr write = writes[i]->pkt;
-            if (write->intersect(pkt)) {
-                DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a writeback\n",
-                        pkt->cmdString(), blk_addr);
-                notDone = fixPacket(pkt, write);
-            }
-        }
-        if (notDone && otherSidePort == memSidePort) {
-            otherSidePort->checkAndSendFunctional(pkt);
-            assert(pkt->result == Packet::Success);
-        }
-        return 0;
-    } else if (!blk && !(pkt->flags & SATISFIED)) {
-        // update the cache state and statistics
-        if (mshr || !writes.empty()){
-            // Can't handle it, return request unsatisfied.
-            panic("Atomic access ran into outstanding MSHR's or WB's!");
-        }
-        if (!pkt->req->isUncacheable() /*Uncacheables just go through*/
-            && (pkt->cmd != MemCmd::Writeback)/*Writebacks on miss fall through*/) {
-                // Fetch the cache block to fill
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd temp_cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-
-            PacketPtr busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
-
-            busPkt->allocate();
-
-            busPkt->time = curTick;
-
-            DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                    busPkt->cmdString(), busPkt->getAddr());
-
-            lat = memSidePort->sendAtomic(busPkt);
-
-            //Be sure to flip the response to a request for coherence
-            if (busPkt->needsResponse()) {
-                busPkt->makeAtomicResponse();
+    if (miss_mshr && write_mshr) {
+        // We have one of each... normally we favor the miss request
+        // unless the write buffer is full
+        if (writeBuffer.isFull() && writeBuffer.inServiceEntries == 0) {
+            // Write buffer is full, so we'd like to issue a write;
+            // need to search MSHR queue for conflicting earlier miss.
+            MSHR *conflict_mshr =
+                mshrQueue.findPending(write_mshr->addr, write_mshr->size);
+
+            if (conflict_mshr && conflict_mshr->order < write_mshr->order) {
+                // Service misses in order until conflict is cleared.
+                return conflict_mshr;
             }
 
-/*		if (!(busPkt->flags & SATISFIED)) {
-// blocked at a higher level, just return
-return 0;
-}
-
-*/		misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state =
-                coherence->getNewState(busPkt, old_state);
-            DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                    busPkt->cmdString(), busPkt->getAddr(), old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", busPkt->getAddr(), old_state, new_state);
-
-            handleFill(blk, busPkt, new_state, writebacks, pkt);
-            //Free the packet
-            delete busPkt;
-
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
-                return lat + hitLatency;
-        } else {
-            return memSidePort->sendAtomic(pkt);
+            // No conflicts; issue write
+            return write_mshr;
         }
-    } else {
-        if (blk) {
-            // There was a cache hit.
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
 
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+        // Write buffer isn't full, but need to check it for
+        // conflicting earlier writeback
+        MSHR *conflict_mshr =
+            writeBuffer.findPending(miss_mshr->addr, miss_mshr->size);
+        if (conflict_mshr) {
+            // not sure why we don't check order here... it was in the
+            // original code but commented out.
+
+            // The only way this happens is if we are
+            // doing a write and we didn't have permissions
+            // then subsequently saw a writeback (owned got evicted)
+            // We need to make sure to perform the writeback first
+            // To preserve the dirty data, then we can issue the write
+
+            // should we return write_mshr here instead?  I.e. do we
+            // have to flush writes in order?  I don't think so... not
+            // for Alpha anyway.  Maybe for x86?
+            return conflict_mshr;
         }
 
-        return hitLatency;
+        // No conclifts; issue read
+        return miss_mshr;
+    }
+
+    // fall through... no pending requests.  Try a prefetch.
+    assert(!miss_mshr && !write_mshr);
+    if (!mshrQueue.isFull()) {
+        // If we have a miss queue slot, we can try a prefetch
+        PacketPtr pkt = prefetcher->getPacket();
+        if (pkt) {
+            // Update statistic on number of prefetches issued
+            // (hwpf_mshr_misses)
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // Don't request bus, since we already have it
+            return allocateBuffer(pkt, curTick, true, false);
+        }
     }
 
-    return 0;
+    return NULL;
 }
 
+
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::snoopProbe(PacketPtr &pkt)
+PacketPtr
+Cache<TagStore,Coherence>::getPacket()
 {
-    ///// PROPAGATE SNOOP UPWARD HERE
+    MSHR *mshr = getNextMSHR();
 
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    CacheBlk::State new_state = 0;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-            handleSnoop(blk, new_state, pkt);
-            return hitLatency;
+    if (mshr == NULL) {
+        return NULL;
     }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n",
-                    pkt->cmdString(), blk_addr, new_state);
-    handleSnoop(blk, new_state);
-    return 0;
-}
 
-template<class TagStore, class Coherence>
-Port *
-Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
-{
-    if (if_name == "" || if_name == "cpu_side") {
-        return cpuSidePort;
-    } else if (if_name == "mem_side") {
-        return memSidePort;
-    } else if (if_name == "functional") {
-        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    BlkType *blk = tags->findBlock(mshr->addr);
+
+    // use request from 1st target
+    MSHR::Target *tgt1 = mshr->getTarget();
+    PacketPtr tgt1_pkt = tgt1->pkt;
+    PacketPtr pkt;
+
+    if (mshr->isCacheFill) {
+        MemCmd cmd;
+        if (blk && blk->isValid()) {
+            // only reason to be here is that blk is shared
+            // (read-only) and we need exclusive
+            assert(mshr->needsExclusive && !blk->isWritable());
+            cmd = MemCmd::UpgradeReq;
+        } else {
+            // block is invalid
+            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+        }
+        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
     } else {
-        panic("Port name %s unrecognized\n", if_name);
+        assert(blk == NULL);
+        assert(mshr->getNumTargets() == 1);
+        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
     }
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::deletePortRefs(Port *p)
-{
-    if (cpuSidePort == p || memSidePort == p)
-        panic("Can only delete functional ports\n");
 
-    delete p;
+    pkt->senderState = mshr;
+    pkt->allocate();
+    return pkt;
 }
 
 
+///////////////
+//
+// CpuSidePort
+//
+///////////////
+
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::CpuSidePort::
@@ -1155,131 +1123,57 @@ template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
-    assert(pkt->result != Packet::Nacked);
-
-    if (!pkt->req->isUncacheable()
-        && pkt->isInvalidate()
-        && !pkt->isRead() && !pkt->isWrite()) {
-        //Upgrade or Invalidate
-        //Look into what happens if two slave caches on bus
-        DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-
-        assert(!(pkt->flags & SATISFIED));
-        pkt->flags |= SATISFIED;
-        //Invalidates/Upgrades need no response if they get the bus
-        return true;
-    }
-
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isWrite() && (pkt->req->isLocked())) {
-        pkt->req->setExtraData(1);
-    }
-    myCache()->access(pkt);
+    myCache()->timingAccess(pkt);
     return true;
 }
 
 
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::recvRetry()
-{
-    recvRetryCommon();
-}
-
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::processRequestEvent()
+Tick
+Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (waitingOnRetry)
-        return;
-    //We have some responses to drain first
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-    }
+    return myCache()->atomicAccess(pkt);
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::CpuSidePort::processResponseEvent()
+Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
-    assert(transmitList.size());
-    assert(transmitList.front().first <= curTick);
-    PacketPtr pkt = transmitList.front().second;
-    transmitList.pop_front();
-    if (!transmitList.empty()) {
-        Tick time = transmitList.front().first;
-        responseEvent->schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", name());
-    if (!drainList.empty() || waitingOnRetry) {
-        //Already have a list, just append
-        drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
-    }
-    else if (!sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
-        drainList.push_back(pkt);
-        waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (drainList.empty() && transmitList.empty())
-        myCache()->checkDrain();
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
+        myCache()->functionalAccess(pkt, cache->memSidePort);
 }
 
 
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
-{
-    myCache()->probe(pkt, true, NULL);
-    //TEMP ALWAYS SUCCES FOR NOW
-    pkt->result = Packet::Success;
-    //Fix this timing info
-    return myCache()->hitLatency;
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore,Coherence>::
+CpuSidePort::CpuSidePort(const std::string &_name,
+                         Cache<TagStore,Coherence> *_cache)
+    : BaseCache::CachePort(_name, _cache)
 {
-    if (checkFunctional(pkt)) {
-        //TEMP USE CPU?THREAD 0 0
-        pkt->req->setThreadContext(0,0);
-
-        myCache()->probe(pkt, false, cache->memSidePort);
-        //TEMP ALWAYS SUCCESFUL FOR NOW
-        pkt->result = Packet::Success;
-    }
 }
 
+///////////////
+//
+// MemSidePort
+//
+///////////////
 
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::MemSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
-    // Memory-side port always snoops.
-    bool dummy;
-    otherPort->getPeerAddressRanges(resp, dummy);
+    otherPort->getPeerAddressRanges(resp, snoop);
+    // Memory-side port always snoops, so unconditionally set flag for
+    // caller.
     snoop = true;
 }
 
@@ -1303,177 +1197,115 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
     if (pkt->isResponse()) {
         myCache()->handleResponse(pkt);
     } else {
-        myCache()->snoop(pkt);
+        myCache()->snoopTiming(pkt);
     }
     return true;
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+Tick
+Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (recvRetryCommon()) {
-        return;
-    }
-
-    DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
-    if (!cache->isMemSideBusRequested()) {
-        //This can happen if I am the owner of a block and see an upgrade
-        //while the block was in my WB Buffers.  I just remove the
-        //wb and de-assert the masterRequest
-        waitingOnRetry = false;
-        return;
-    }
-    PacketPtr pkt = myCache()->getPacket();
-    MSHR* mshr = (MSHR*) pkt->senderState;
-    //Copy the packet, it may be modified/destroyed elsewhere
-    PacketPtr copyPkt = new Packet(*pkt);
-    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-    mshr->pkt = copyPkt;
-
-    bool success = sendTiming(pkt);
-    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-            pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-    waitingOnRetry = !success;
-    if (waitingOnRetry) {
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
-    }
+    // in atomic mode, responses go back to the sender via the
+    // function return from sendAtomic(), not via a separate
+    // sendAtomic() from the responder.  Thus we should never see a
+    // response packet in recvAtomic() (anywhere, not just here).
+    assert(!pkt->isResponse());
+    return myCache()->snoopAtomic(pkt);
+}
 
-    myCache()->sendResult(pkt, mshr, success);
 
-    if (success && cache->isMemSideBusRequested())
-    {
-        DPRINTF(CachePort, "%s has more requests\n", name());
-        //Still more to issue, rerequest in 1 cycle
-        new RequestEvent(this, curTick + 1);
-    }
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+{
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
+        myCache()->functionalAccess(pkt, cache->cpuSidePort);
 }
 
 
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::processRequestEvent()
+Cache<TagStore,Coherence>::MemSidePort::sendPacket()
 {
-    if (waitingOnRetry)
-        return;
-    //We have some responses to drain first
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-        return;
-    }
+    // if we have responses that are ready, they take precedence
+    if (deferredPacketReady()) {
+        bool success = sendTiming(transmitList.front().pkt);
 
-    DPRINTF(CachePort, "%s trying to send a MSHR request\n", name());
-    if (!isBusRequested()) {
-        //This can happen if I am the owner of a block and see an upgrade
-        //while the block was in my WB Buffers.  I just remove the
-        //wb and de-assert the masterRequest
-        return;
-    }
+        if (success) {
+            //send successful, remove packet
+            transmitList.pop_front();
+        }
 
-    PacketPtr pkt = myCache()->getPacket();
-    MSHR* mshr = (MSHR*) pkt->senderState;
-    //Copy the packet, it may be modified/destroyed elsewhere
-    PacketPtr copyPkt = new Packet(*pkt);
-    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-    mshr->pkt = copyPkt;
+        waitingOnRetry = !success;
+    } else {
+        // check for non-response packets (requests & writebacks)
+        PacketPtr pkt = myCache()->getPacket();
+        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
-    bool success = sendTiming(pkt);
-    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+        bool success = sendTiming(pkt);
+        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                pkt->getAddr(), success ? "successful" : "unsuccessful");
 
-    waitingOnRetry = !success;
-    if (waitingOnRetry) {
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        waitingOnRetry = !success;
+        if (waitingOnRetry) {
+            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        } else {
+            myCache()->markInService(mshr);
+        }
     }
 
-    myCache()->sendResult(pkt, mshr, success);
-    if (success && isBusRequested())
-    {
-        DPRINTF(CachePort, "%s still more MSHR requests to send\n", name());
-        //Still more to issue, rerequest in 1 cycle
-        scheduleRequestEvent(curTick+1);
+
+    // tried to send packet... if it was successful (no retry), see if
+    // we need to rerequest bus or not
+    if (!waitingOnRetry) {
+        if (isBusRequested()) {
+            // more requests/writebacks: rerequest ASAP
+            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
+                    name());
+            sendEvent->schedule(curTick+1);
+        } else if (!transmitList.empty()) {
+            // deferred packets: rerequest bus, but possibly not until later
+            Tick time = transmitList.front().tick;
+            sendEvent->schedule(time <= curTick ? curTick+1 : time);
+        } else {
+            // no more to send right now: if we're draining, we may be done
+            if (drainEvent) {
+                drainEvent->process();
+                drainEvent = NULL;
+            }
+        }
     }
 }
 
-
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::processResponseEvent()
+Cache<TagStore,Coherence>::MemSidePort::recvRetry()
 {
-    assert(transmitList.size());
-    assert(transmitList.front().first <= curTick);
-    PacketPtr pkt = transmitList.front().second;
-    transmitList.pop_front();
-    if (!transmitList.empty()) {
-        Tick time = transmitList.front().first;
-        responseEvent->schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", name());
-    if (!drainList.empty() || waitingOnRetry) {
-        //Already have a list, just append
-        drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
-    }
-    else if (!sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
-        drainList.push_back(pkt);
-        waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (drainList.empty() && transmitList.empty())
-        myCache()->checkDrain();
+    assert(waitingOnRetry);
+    sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
-{
-    if (pkt->isResponse())
-        myCache()->handleResponse(pkt);
-    else
-        return myCache()->snoopProbe(pkt);
-    //Fix this timing info
-    return myCache()->hitLatency;
-}
-
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore,Coherence>::MemSidePort::processSendEvent()
 {
-    myCache()->probe(pkt, false, cache->cpuSidePort);
-    if (pkt->result != Packet::Success)
-        checkFunctional(pkt);
+    assert(!waitingOnRetry);
+    sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-CpuSidePort::CpuSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache)
-{
-    responseEvent = new ResponseEvent(this);
-}
-
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 MemSidePort::MemSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
-    responseEvent = new ResponseEvent(this);
+    // override default send event from SimpleTimingPort
+    delete sendEvent;
+    sendEvent = new SendEvent(this);
 }
-
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index bc8de0d26..3fd17c8c7 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -139,31 +139,6 @@ CoherenceProtocol::regStats()
         .desc("readEx snoops on exclusive blocks")
         ;
 
-    snoopCount[Shared][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_shared")
-        .desc("Invalidate snoops on shared blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_owned")
-        .desc("Invalidate snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_exclusive")
-        .desc("Invalidate snoops on exclusive blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_modified")
-        .desc("Invalidate snoops on modified blocks")
-        ;
-
-    snoopCount[Invalid][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_invalid")
-        .desc("Invalidate snoops on invalid blocks")
-        ;
-
     snoopCount[Shared][MemCmd::WriteInvalidateReq]
         .name(name() + ".snoop_writeinv_shared")
         .desc("WriteInvalidate snoops on shared blocks")
@@ -219,7 +194,7 @@ CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Shared;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return supplyTrans(cache, pkt, blk, mshr, new_state);
 }
 
@@ -231,7 +206,7 @@ CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt,
                                            CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Owned;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return supplyTrans(cache, pkt, blk, mshr, new_state);
 }
 
@@ -253,7 +228,7 @@ CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Shared;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return false;
 }
 
@@ -336,12 +311,10 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     //
     tt[Invalid][MC::ReadReq].onSnoop(nullTransition);
     tt[Invalid][MC::ReadExReq].onSnoop(nullTransition);
-    tt[Invalid][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     tt[Shared][MC::ReadReq].onSnoop(hasExclusive
                                    ? assertShared : nullTransition);
     tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     if (doUpgrades) {
         tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition);
@@ -351,13 +324,11 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Modified][MC::ReadReq].onSnoop(hasOwned
                                      ? supplyAndGotoOwnedTrans
                                      : supplyAndGotoSharedTrans);
-    tt[Modified][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
 
     if (hasExclusive) {
         tt[Exclusive][MC::ReadReq].onSnoop(assertShared);
         tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans);
-        tt[Exclusive][MC::InvalidateReq].onSnoop(invalidateTrans);
         tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
 
@@ -365,7 +336,6 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
         tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
         tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
         tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans);
-        tt[Owned][MC::InvalidateReq].onSnoop(invalidateTrans);
         tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
 
@@ -394,7 +364,7 @@ CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state,
 
 
 CacheBlk::State
-CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState)
+CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState)
 {
     CacheBlk::State state = oldState & stateMask;
     int cmd_idx = pkt->cmdToIndex();
@@ -406,7 +376,7 @@ CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState)
 
     //Check if it's exclusive and the shared line was asserted,
     //then  goto shared instead
-    if (newState == Exclusive && (pkt->flags & SHARED_LINE)) {
+    if (newState == Exclusive && pkt->sharedAsserted()) {
         newState = Shared;
     }
 
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
index 775bc807a..4b8024582 100644
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ b/src/mem/cache/coherence/coherence_protocol.hh
@@ -89,8 +89,8 @@ class CoherenceProtocol : public SimObject
      * @param oldState The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(PacketPtr &pkt,
-                                CacheBlk::State oldState);
+    CacheBlk::State getNewState(PacketPtr pkt,
+                                CacheBlk::State oldState = 0);
 
     /**
      * Handle snooped bus requests.
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
index 095260ca4..214828ca7 100644
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -94,25 +94,14 @@ class SimpleCoherence
         return NULL;
     }
 
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
-    {
-        //Don't do coherence
-        return;
-    }
-
-
     /**
      * Return the proper state given the current state and the bus response.
      * @param pkt The bus response.
      * @param current The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
+    CacheBlk::State getNewState(PacketPtr pkt,
+                                CacheBlk::State current = 0)
     {
         return protocol->getNewState(pkt, current);
     }
diff --git a/src/mem/cache/miss/SConscript b/src/mem/cache/miss/SConscript
index 0f81a2570..376d670cd 100644
--- a/src/mem/cache/miss/SConscript
+++ b/src/mem/cache/miss/SConscript
@@ -30,8 +30,5 @@
 
 Import('*')
 
-Source('blocking_buffer.cc')
-Source('miss_buffer.cc')
-Source('miss_queue.cc')
 Source('mshr.cc')
 Source('mshr_queue.cc')
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
deleted file mode 100644
index 281328c2e..000000000
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Definitions of a simple buffer for a blocking cache.
- */
-#include <cstring>
-
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-#include "mem/request.hh"
-
-/**
- * @todo Move writebacks into shared BaseBuffer class.
- */
-void
-BlockingBuffer::regStats(const std::string &name)
-{
-    MissBuffer::regStats(name);
-}
-
-
-void
-BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
-{
-    Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1);
-    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
-                               !pkt->needsResponse())) {
-        if (!pkt->needsResponse()) {
-            wb.allocateAsBuffer(pkt);
-        } else {
-            wb.allocate(pkt->cmd, blk_addr, blk_size, pkt);
-        }
-
-        std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size);
-
-        cache->setBlocked(Blocked_NoWBBuffers);
-        cache->requestMemSideBus(Request_WB, time);
-        return;
-    }
-
-    if (!pkt->needsResponse()) {
-        miss.allocateAsBuffer(pkt);
-    } else {
-        miss.allocate(pkt->cmd, blk_addr, blk_size, pkt);
-    }
-    if (!pkt->req->isUncacheable()) {
-        miss.pkt->flags |= CACHE_LINE_FILL;
-    }
-    cache->setBlocked(Blocked_NoMSHRs);
-    cache->requestMemSideBus(Request_MSHR, time);
-}
-
-PacketPtr
-BlockingBuffer::getPacket()
-{
-    if (miss.pkt && !miss.inService) {
-        return miss.pkt;
-    }
-    return wb.pkt;
-}
-
-void
-BlockingBuffer::setBusCmd(PacketPtr &pkt, MemCmd cmd)
-{
-    MSHR *mshr = (MSHR*) pkt->senderState;
-    mshr->originalCmd = pkt->cmd;
-    if (pkt->isCacheFill())
-        pkt->cmdOverride(cmd);
-}
-
-void
-BlockingBuffer::restoreOrigCmd(PacketPtr &pkt)
-{
-    pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd);
-}
-
-void
-BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
-{
-    if (!pkt->isCacheFill() && pkt->isWrite()) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        assert(mshr == &wb);
-        cache->deassertMemSideBusRequest(Request_WB);
-        if (!pkt->needsResponse()) {
-            assert(wb.getNumTargets() == 0);
-            wb.deallocate();
-            cache->clearBlocked(Blocked_NoWBBuffers);
-        } else {
-            wb.inService = true;
-        }
-    } else {
-        assert(mshr == &miss);
-        cache->deassertMemSideBusRequest(Request_MSHR);
-        if (!pkt->needsResponse()) {
-            assert(miss.getNumTargets() == 0);
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-        } else {
-            //mark in service
-            miss.inService = true;
-        }
-    }
-}
-
-void
-BlockingBuffer::handleResponse(PacketPtr &pkt, Tick time)
-{
-    if (pkt->isCacheFill()) {
-        // targets were handled in the cache tags
-        assert((MSHR*)pkt->senderState == &miss);
-        miss.deallocate();
-        cache->clearBlocked(Blocked_NoMSHRs);
-    } else {
-        if (((MSHR*)(pkt->senderState))->hasTargets()) {
-            // Should only have 1 target if we had any
-            assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1);
-            PacketPtr target = ((MSHR*)(pkt->senderState))->getTarget();
-            ((MSHR*)(pkt->senderState))->popTarget();
-            if (pkt->isRead()) {
-                std::memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), target->getSize());
-            }
-            cache->respond(target, time);
-            assert(!((MSHR*)(pkt->senderState))->hasTargets());
-        }
-
-        if (pkt->isWrite()) {
-            assert(((MSHR*)(pkt->senderState)) == &wb);
-            wb.deallocate();
-            cache->clearBlocked(Blocked_NoWBBuffers);
-        } else {
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-        }
-    }
-}
-
-void
-BlockingBuffer::squash(int threadNum)
-{
-    if (miss.threadNum == threadNum) {
-        PacketPtr target = miss.getTarget();
-        miss.popTarget();
-        assert(0/*target->req->getThreadNum()*/ == threadNum);
-        target = NULL;
-        assert(!miss.hasTargets());
-        miss.ntargets=0;
-        if (!miss.inService) {
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-            cache->deassertMemSideBusRequest(Request_MSHR);
-        }
-    }
-}
-
-void
-BlockingBuffer::doWriteback(Addr addr,
-                            int size, uint8_t *data, bool compressed)
-{
-    // Generate request
-    Request * req = new Request(addr, size, 0);
-    PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1);
-    pkt->allocate();
-    if (data) {
-        std::memcpy(pkt->getPtr<uint8_t>(), data, size);
-    }
-
-    if (compressed) {
-        pkt->flags |= COMPRESSED;
-    }
-
-    ///All writebacks charged to same thread @todo figure this out
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    wb.allocateAsBuffer(pkt);
-    cache->requestMemSideBus(Request_WB, curTick);
-    cache->setBlocked(Blocked_NoWBBuffers);
-}
-
-
-
-void
-BlockingBuffer::doWriteback(PacketPtr &pkt)
-{
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    wb.allocateAsBuffer(pkt);
-
-    // Since allocate as buffer copies the request,
-    // need to copy data here.
-    std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
-
-    cache->setBlocked(Blocked_NoWBBuffers);
-    cache->requestMemSideBus(Request_WB, curTick);
-}
-
-
-MSHR *
-BlockingBuffer::findMSHR(Addr addr)
-{
-    if (miss.addr == addr && miss.pkt)
-        return &miss;
-    return NULL;
-}
-
-
-bool
-BlockingBuffer::findWrites(Addr addr, std::vector<MSHR*>& writes)
-{
-    if (wb.addr == addr && wb.pkt) {
-        writes.push_back(&wb);
-        return true;
-    }
-    return false;
-}
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
deleted file mode 100644
index 86b24d539..000000000
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Declaration of a simple buffer for a blocking cache.
- */
-
-#ifndef __BLOCKING_BUFFER_HH__
-#define __BLOCKING_BUFFER_HH__
-
-#include <vector>
-
-#include "base/misc.hh" // for fatal()
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/miss/mshr.hh"
-
-/**
- * Miss and writeback storage for a blocking cache.
- */
-class BlockingBuffer : public MissBuffer
-{
-protected:
-    /** Miss storage. */
-    MSHR miss;
-    /** WB storage. */
-    MSHR wb;
-
-public:
-    /**
-     * Builds and initializes this buffer.
-     * @param write_allocate If true, treat write misses the same as reads.
-     */
-    BlockingBuffer(bool write_allocate)
-        : MissBuffer(write_allocate)
-    {
-    }
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Requests the bus and marks the cache as
-     * blocked.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     PacketPtr &target)
-    {
-        fatal("Unimplemented");
-        M5_DUMMY_RETURN
-    }
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    void setBusCmd(PacketPtr &pkt, MemCmd cmd);
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    void restoreOrigCmd(PacketPtr &pkt);
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    void markInService(PacketPtr &pkt, MSHR* mshr);
-
-    /**
-     * Frees the resources of the request and unblock the cache.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr &pkt, Tick time);
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    void squash(int threadNum);
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    int getMisses()
-    {
-        return miss.getNumTargets();
-    }
-
-    /**
-     * Searches for the supplied address in the miss "queue".
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return A pointer to miss if it matches.
-     */
-    MSHR* findMSHR(Addr addr);
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes List of pointers to the matching writes.
-     * @return True if there is a matching write.
-     */
-    bool findWrites(Addr addr, std::vector<MSHR*>& writes);
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    void doWriteback(Addr addr,
-                     int size, uint8_t *data, bool compressed);
-
-    /**
-     * Perform a writeback request.
-     * @param pkt The writeback request.
-     */
-    void doWriteback(PacketPtr &pkt);
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    bool havePending()
-    {
-        return !miss.inService || !wb.inService;
-    }
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    void addTarget(MSHR *mshr, PacketPtr &pkt)
-    {
-        fatal("Shouldn't call this on a blocking buffer.");
-    }
-
-    /**
-     * Dummy implmentation.
-     */
-    MSHR* allocateTargetList(Addr addr)
-    {
-        fatal("Unimplemented");
-        M5_DUMMY_RETURN
-    }
-};
-
-#endif // __BLOCKING_BUFFER_HH__
diff --git a/src/mem/cache/miss/miss_buffer.cc b/src/mem/cache/miss/miss_buffer.cc
deleted file mode 100644
index 4d9cd0958..000000000
--- a/src/mem/cache/miss/miss_buffer.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2003-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#include "cpu/smt.hh" //for maxThreadsPerCPU
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-
-/**
- * @todo Move writebacks into shared BaseBuffer class.
- */
-void
-MissBuffer::regStats(const std::string &name)
-{
-    using namespace Stats;
-    writebacks
-        .init(maxThreadsPerCPU)
-        .name(name + ".writebacks")
-        .desc("number of writebacks")
-        .flags(total)
-        ;
-}
-
-void
-MissBuffer::setCache(BaseCache *_cache)
-{
-    cache = _cache;
-    blkSize = cache->getBlockSize();
-}
-
-void
-MissBuffer::setPrefetcher(BasePrefetcher *_prefetcher)
-{
-    prefetcher = _prefetcher;
-}
diff --git a/src/mem/cache/miss/miss_buffer.hh b/src/mem/cache/miss/miss_buffer.hh
deleted file mode 100644
index 9a86db304..000000000
--- a/src/mem/cache/miss/miss_buffer.hh
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2003-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Steve Reinhardt
- */
-
-/**
- * @file
- * MissBuffer declaration.
- */
-
-#ifndef __MISS_BUFFER_HH__
-#define __MISS_BUFFER_HH__
-
-class BaseCache;
-class BasePrefetcher;
-class MSHR;
-
-/**
- * Abstract base class for cache miss buffering.
- */
-class MissBuffer
-{
-  protected:
-    /** True if the cache should allocate on a write miss. */
-    const bool writeAllocate;
-
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-
-    /** The Prefetcher */
-    BasePrefetcher *prefetcher;
-
-    /** Block size of the parent cache. */
-    int blkSize;
-
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
-    /** Number of blocks written back per thread. */
-    Stats::Vector<> writebacks;
-
-    /**
-     * @}
-     */
-
-  public:
-    MissBuffer(bool write_allocate)
-        : writeAllocate(write_allocate)
-    {
-    }
-
-    virtual ~MissBuffer() {}
-
-    /**
-     * Called by the parent cache to set the back pointer.
-     * @param _cache A pointer to the parent cache.
-     */
-    void setCache(BaseCache *_cache);
-
-    void setPrefetcher(BasePrefetcher *_prefetcher);
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    virtual void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Either allocate an MSHR for the request,
-     * or forward it through the write buffer.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    virtual void handleMiss(PacketPtr &pkt, int blk_size, Tick time) = 0;
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    virtual MSHR *fetchBlock(Addr addr, int blk_size, Tick time,
-                             PacketPtr &target) = 0;
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    virtual PacketPtr getPacket() = 0;
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    virtual void setBusCmd(PacketPtr &pkt, MemCmd cmd) = 0;
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    virtual void restoreOrigCmd(PacketPtr &pkt) = 0;
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    virtual void markInService(PacketPtr &pkt, MSHR* mshr) = 0;
-
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    virtual void handleResponse(PacketPtr &pkt, Tick time) = 0;
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    virtual void squash(int threadNum) = 0;
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    virtual int getMisses() = 0;
-
-    /**
-     * Searches for the supplied address in the miss queue.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return The MSHR that contains the address, NULL if not found.
-     * @warning Currently only searches the miss queue. If non write allocate
-     * might need to search the write buffer for coherence.
-     */
-    virtual MSHR* findMSHR(Addr addr) = 0;
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes The list of writes that match the address.
-     * @return True if any writes are found
-     */
-    virtual bool findWrites(Addr addr, std::vector<MSHR*>& writes) = 0;
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param xc The execution context of the address space.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    virtual void doWriteback(Addr addr, int size, uint8_t *data,
-                             bool compressed) = 0;
-
-    /**
-     * Perform the given writeback request.
-     * @param pkt The writeback request.
-     */
-    virtual void doWriteback(PacketPtr &pkt) = 0;
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    virtual bool havePending() = 0;
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    virtual void addTarget(MSHR *mshr, PacketPtr &pkt) = 0;
-
-    /**
-     * Allocate a MSHR to hold a list of targets to a block involved in a copy.
-     * If the block is marked done then the MSHR already holds the data to
-     * fill the block. Otherwise the block needs to be fetched.
-     * @param addr The address to buffer.
-     * @param asid The address space ID.
-     * @return A pointer to the allocated MSHR.
-     */
-    virtual MSHR* allocateTargetList(Addr addr) = 0;
-};
-
-#endif //__MISS_BUFFER_HH__
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
deleted file mode 100644
index 67036ed02..000000000
--- a/src/mem/cache/miss/miss_queue.cc
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Miss and writeback queue definitions.
- */
-
-#include "cpu/smt.hh" //for maxThreadsPerCPU
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-
-using namespace std;
-
-// simple constructor
-/**
- * @todo Remove the +16 from the write buffer constructor once we handle
- * stalling on writebacks do to compression writes.
- */
-MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers,
-                     bool write_allocate, bool prefetch_miss)
-    : MissBuffer(write_allocate),
-      mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs),
-      numTarget(numTargets), writeBuffers(write_buffers),
-      order(0), prefetchMiss(prefetch_miss)
-{
-    noTargetMSHR = NULL;
-}
-
-
-MissQueue::~MissQueue()
-{
-}
-
-
-void
-MissQueue::regStats(const string &name)
-{
-    MissBuffer::regStats(name);
-
-    using namespace Stats;
-
-    // MSHR hit statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_hits[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_hits")
-            .desc("number of " + cstr + " MSHR hits")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrHits
-        .name(name + ".demand_mshr_hits")
-        .desc("number of demand (read+write) MSHR hits")
-        .flags(total)
-        ;
-    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
-
-    overallMshrHits
-        .name(name + ".overall_mshr_hits")
-        .desc("number of overall MSHR hits")
-        .flags(total)
-        ;
-    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
-        mshr_hits[MemCmd::HardPFReq];
-
-    // MSHR miss statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_misses[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_misses")
-            .desc("number of " + cstr + " MSHR misses")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrMisses
-        .name(name + ".demand_mshr_misses")
-        .desc("number of demand (read+write) MSHR misses")
-        .flags(total)
-        ;
-    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
-
-    overallMshrMisses
-        .name(name + ".overall_mshr_misses")
-        .desc("number of overall MSHR misses")
-        .flags(total)
-        ;
-    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
-        mshr_misses[MemCmd::HardPFReq];
-
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_miss_latency[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_miss_latency")
-            .desc("number of " + cstr + " MSHR miss cycles")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrMissLatency
-        .name(name + ".demand_mshr_miss_latency")
-        .desc("number of demand (read+write) MSHR miss cycles")
-        .flags(total)
-        ;
-    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
-        + mshr_miss_latency[MemCmd::WriteReq];
-
-    overallMshrMissLatency
-        .name(name + ".overall_mshr_miss_latency")
-        .desc("number of overall MSHR miss cycles")
-        .flags(total)
-        ;
-    overallMshrMissLatency = demandMshrMissLatency +
-        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
-
-    // MSHR uncacheable statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_uncacheable[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_uncacheable")
-            .desc("number of " + cstr + " MSHR uncacheable")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    overallMshrUncacheable
-        .name(name + ".overall_mshr_uncacheable_misses")
-        .desc("number of overall MSHR uncacheable misses")
-        .flags(total)
-        ;
-    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
-        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
-        + mshr_uncacheable[MemCmd::HardPFReq];
-
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_uncacheable_lat[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_uncacheable_latency")
-            .desc("number of " + cstr + " MSHR uncacheable cycles")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    overallMshrUncacheableLatency
-        .name(name + ".overall_mshr_uncacheable_latency")
-        .desc("number of overall MSHR uncacheable cycles")
-        .flags(total)
-        ;
-    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
-        + mshr_uncacheable_lat[MemCmd::WriteReq]
-        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
-        + mshr_uncacheable_lat[MemCmd::HardPFReq];
-
-#if 0
-    // MSHR access formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshrAccesses[access_idx]
-            .name(name + "." + cstr + "_mshr_accesses")
-            .desc("number of " + cstr + " mshr accesses(hits+misses)")
-            .flags(total | nozero | nonan)
-            ;
-        mshrAccesses[access_idx] =
-            mshr_hits[access_idx] + mshr_misses[access_idx]
-            + mshr_uncacheable[access_idx];
-    }
-
-    demandMshrAccesses
-        .name(name + ".demand_mshr_accesses")
-        .desc("number of demand (read+write) mshr accesses")
-        .flags(total | nozero | nonan)
-        ;
-    demandMshrAccesses = demandMshrHits + demandMshrMisses;
-
-    overallMshrAccesses
-        .name(name + ".overall_mshr_accesses")
-        .desc("number of overall (read+write) mshr accesses")
-        .flags(total | nozero | nonan)
-        ;
-    overallMshrAccesses = overallMshrHits + overallMshrMisses
-        + overallMshrUncacheable;
-#endif
-
-    // MSHR miss rate formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshrMissRate[access_idx]
-            .name(name + "." + cstr + "_mshr_miss_rate")
-            .desc("mshr miss rate for " + cstr + " accesses")
-            .flags(total | nozero | nonan)
-            ;
-
-        mshrMissRate[access_idx] =
-            mshr_misses[access_idx] / cache->accesses[access_idx];
-    }
-
-    demandMshrMissRate
-        .name(name + ".demand_mshr_miss_rate")
-        .desc("mshr miss rate for demand accesses")
-        .flags(total)
-        ;
-    demandMshrMissRate = demandMshrMisses / cache->demandAccesses;
-
-    overallMshrMissRate
-        .name(name + ".overall_mshr_miss_rate")
-        .desc("mshr miss rate for overall accesses")
-        .flags(total)
-        ;
-    overallMshrMissRate = overallMshrMisses / cache->overallAccesses;
-
-    // mshrMiss latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrMissLatency[access_idx]
-            .name(name + "." + cstr + "_avg_mshr_miss_latency")
-            .desc("average " + cstr + " mshr miss latency")
-            .flags(total | nozero | nonan)
-            ;
-
-        avgMshrMissLatency[access_idx] =
-            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
-    }
-
-    demandAvgMshrMissLatency
-        .name(name + ".demand_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total)
-        ;
-    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
-
-    overallAvgMshrMissLatency
-        .name(name + ".overall_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total)
-        ;
-    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
-
-    // mshrUncacheable latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrUncacheableLatency[access_idx]
-            .name(name + "." + cstr + "_avg_mshr_uncacheable_latency")
-            .desc("average " + cstr + " mshr uncacheable latency")
-            .flags(total | nozero | nonan)
-            ;
-
-        avgMshrUncacheableLatency[access_idx] =
-            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
-    }
-
-    overallAvgMshrUncacheableLatency
-        .name(name + ".overall_avg_mshr_uncacheable_latency")
-        .desc("average overall mshr uncacheable latency")
-        .flags(total)
-        ;
-    overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
-
-    mshr_cap_events
-        .init(maxThreadsPerCPU)
-        .name(name + ".mshr_cap_events")
-        .desc("number of times MSHR cap was activated")
-        .flags(total)
-        ;
-
-    //software prefetching stats
-    soft_prefetch_mshr_full
-        .init(maxThreadsPerCPU)
-        .name(name + ".soft_prefetch_mshr_full")
-        .desc("number of mshr full events for SW prefetching instrutions")
-        .flags(total)
-        ;
-
-    mshr_no_allocate_misses
-        .name(name +".no_allocate_misses")
-        .desc("Number of misses that were no-allocate")
-        ;
-
-}
-
-
-MSHR*
-MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time)
-{
-    MSHR* mshr = mq.allocate(pkt, size);
-    mshr->order = order++;
-    if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) {
-        // Mark this as a cache line fill
-        mshr->pkt->flags |= CACHE_LINE_FILL;
-    }
-    if (mq.isFull()) {
-        cache->setBlocked(Blocked_NoMSHRs);
-    }
-    if (pkt->cmd != MemCmd::HardPFReq) {
-        //If we need to request the bus (not on HW prefetch), do so
-        cache->requestMemSideBus(Request_MSHR, time);
-    }
-    return mshr;
-}
-
-
-MSHR*
-MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time)
-{
-    MSHR* mshr = wb.allocate(pkt,size);
-    mshr->order = order++;
-
-//REMOVING COMPRESSION FOR NOW
-#if 0
-    if (pkt->isCompressed()) {
-        mshr->pkt->deleteData();
-        mshr->pkt->actualSize = pkt->actualSize;
-        mshr->pkt->data = new uint8_t[pkt->actualSize];
-        memcpy(mshr->pkt->data, pkt->data, pkt->actualSize);
-    } else {
-#endif
-        memcpy(mshr->pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
-  //{
-
-    if (wb.isFull()) {
-        cache->setBlocked(Blocked_NoWBBuffers);
-    }
-
-    cache->requestMemSideBus(Request_WB, time);
-
-    return mshr;
-}
-
-
-/**
- * @todo Remove SW prefetches on mshr hits.
- */
-void
-MissQueue::handleMiss(PacketPtr &pkt, int blkSize, Tick time)
-{
-//    if (!cache->isTopLevel())
-    if (prefetchMiss) prefetcher->handleMiss(pkt, time);
-
-    int size = blkSize;
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
-    MSHR* mshr = NULL;
-    if (!pkt->req->isUncacheable()) {
-        mshr = mq.findMatch(blkAddr);
-        if (mshr) {
-            //@todo remove hw_pf here
-            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
-                mshr->threadNum = -1;
-            }
-            mq.allocateTarget(mshr, pkt);
-            if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) {
-                //We are adding an allocate after a no-allocate
-                mshr->pkt->flags &= ~NO_ALLOCATE;
-            }
-            if (mshr->getNumTargets() == numTarget) {
-                noTargetMSHR = mshr;
-                cache->setBlocked(Blocked_NoTargets);
-                mq.moveToFront(mshr);
-            }
-            return;
-        }
-        if (pkt->isNoAllocate()) {
-            //Count no-allocate requests differently
-            mshr_no_allocate_misses++;
-        }
-        else {
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        }
-    } else {
-        //Count uncacheable accesses
-        mshr_uncacheable[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        size = pkt->getSize();
-    }
-    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
-                               !pkt->needsResponse())) {
-        /**
-         * @todo Add write merging here.
-         */
-        mshr = allocateWrite(pkt, pkt->getSize(), time);
-        return;
-    }
-
-    mshr = allocateMiss(pkt, blkSize, time);
-}
-
-MSHR*
-MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
-                      PacketPtr &target)
-{
-    Addr blkAddr = addr & ~(Addr)(blk_size - 1);
-    assert(mq.findMatch(addr) == NULL);
-    MSHR *mshr = mq.allocateFetch(blkAddr, blk_size, target);
-    mshr->order = order++;
-    mshr->pkt->flags |= CACHE_LINE_FILL;
-    if (mq.isFull()) {
-        cache->setBlocked(Blocked_NoMSHRs);
-    }
-    cache->requestMemSideBus(Request_MSHR, time);
-    return mshr;
-}
-
-PacketPtr
-MissQueue::getPacket()
-{
-    PacketPtr pkt = mq.getReq();
-    if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt ||
-         pkt->time > curTick) && wb.havePending()) {
-        pkt = wb.getReq();
-        // Need to search for earlier miss.
-        MSHR *mshr = mq.findPending(pkt);
-        if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) {
-            // Service misses in order until conflict is cleared.
-            return mq.getReq();
-        }
-    }
-    if (pkt) {
-        MSHR* mshr = wb.findPending(pkt);
-        if (mshr /*&& mshr->order < pkt->senderState->order*/) {
-            // The only way this happens is if we are
-            // doing a write and we didn't have permissions
-            // then subsequently saw a writeback(owned got evicted)
-            // We need to make sure to perform the writeback first
-            // To preserve the dirty data, then we can issue the write
-            return wb.getReq();
-        }
-    }
-    else if (!mq.isFull()){
-        //If we have a miss queue slot, we can try a prefetch
-        pkt = prefetcher->getPacket();
-        if (pkt) {
-            //Update statistic on number of prefetches issued (hwpf_mshr_misses)
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-            //It will request the bus for the future, but should clear that immedieatley
-            allocateMiss(pkt, pkt->getSize(), curTick);
-            pkt = mq.getReq();
-            assert(pkt); //We should get back a req b/c we just put one in
-        }
-    }
-    return pkt;
-}
-
-void
-MissQueue::setBusCmd(PacketPtr &pkt, MemCmd cmd)
-{
-    assert(pkt->senderState != 0);
-    MSHR * mshr = (MSHR*)pkt->senderState;
-    mshr->originalCmd = pkt->cmd;
-    if (cmd == MemCmd::UpgradeReq || cmd == MemCmd::InvalidateReq) {
-        pkt->flags |= NO_ALLOCATE;
-        pkt->flags &= ~CACHE_LINE_FILL;
-    }
-    else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() &&
-             cmd.needsResponse()) {
-        pkt->flags |= CACHE_LINE_FILL;
-    }
-    if (pkt->isCacheFill() || pkt->isNoAllocate())
-        pkt->cmd = cmd;
-}
-
-void
-MissQueue::restoreOrigCmd(PacketPtr &pkt)
-{
-    pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd;
-}
-
-void
-MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
-{
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if ((!pkt->isCacheFill() && pkt->isWrite())) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = wb.isFull();
-        wb.markInService(mshr);
-        if (!wb.havePending()){
-            cache->deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !wb.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        unblock = mq.isFull();
-        mq.markInService(mshr);
-        if (!mq.havePending()){
-            cache->deassertMemSideBusRequest(Request_MSHR);
-        }
-        if (mshr->originalCmd == MemCmd::HardPFReq) {
-            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
-                    cache->name());
-            //Also clear pending if need be
-            if (!prefetcher->havePending())
-            {
-                cache->deassertMemSideBusRequest(Request_PF);
-            }
-        }
-        if (unblock) {
-            unblock = !mq.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        cache->clearBlocked(cause);
-    }
-}
-
-
-void
-MissQueue::handleResponse(PacketPtr &pkt, Tick time)
-{
-    MSHR* mshr = (MSHR*)pkt->senderState;
-    if (((MSHR*)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) {
-        DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n",
-                cache->name());
-    }
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
-
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            MemCmd cmd = mshr->getTarget()->cmd;
-            mshr->pkt->setDest(Packet::Broadcast);
-            mshr->pkt->result = Packet::Unknown;
-            mshr->pkt->req = mshr->getTarget()->req;
-            mq.markPending(mshr, cmd);
-            mshr->order = order++;
-            cache->requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mq.isFull();
-            mq.deallocate(mshr);
-            if (unblock) {
-                unblock = !mq.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            PacketPtr target = mshr->getTarget();
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
-                       target->getSize());
-            }
-            cache->respond(target, time);
-            assert(!mshr->hasTargets());
-        }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(mshr->pkt->isNoAllocate());
-            while (mshr->hasTargets()) {
-                PacketPtr target = mshr->getTarget();
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
-                           target->getSize());
-                }
-                cache->respond(target, time);
-            }
-        }
-
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = wb.isFull();
-            wb.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !wb.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mq.isFull();
-            mq.deallocate(mshr);
-            if (unblock) {
-                unblock = !mq.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    }
-    if (unblock || unblock_target) {
-        cache->clearBlocked(cause);
-    }
-}
-
-void
-MissQueue::squash(int threadNum)
-{
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
-        noTargetMSHR = NULL;
-        unblock = true;
-        cause = Blocked_NoTargets;
-    }
-    if (mq.isFull()) {
-        unblock = true;
-        cause = Blocked_NoMSHRs;
-    }
-    mq.squash(threadNum);
-    if (!mq.havePending()) {
-        cache->deassertMemSideBusRequest(Request_MSHR);
-    }
-    if (unblock && !mq.isFull()) {
-        cache->clearBlocked(cause);
-    }
-
-}
-
-MSHR*
-MissQueue::findMSHR(Addr addr)
-{
-    return mq.findMatch(addr);
-}
-
-bool
-MissQueue::findWrites(Addr addr, vector<MSHR*> &writes)
-{
-    return wb.findMatches(addr,writes);
-}
-
-void
-MissQueue::doWriteback(Addr addr,
-                       int size, uint8_t *data, bool compressed)
-{
-    // Generate request
-    Request * req = new Request(addr, size, 0);
-    PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1);
-    pkt->allocate();
-    if (data) {
-        memcpy(pkt->getPtr<uint8_t>(), data, size);
-    }
-
-    if (compressed) {
-        pkt->flags |= COMPRESSED;
-    }
-
-    ///All writebacks charged to same thread @todo figure this out
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    allocateWrite(pkt, 0, curTick);
-}
-
-
-void
-MissQueue::doWriteback(PacketPtr &pkt)
-{
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-    allocateWrite(pkt, 0, curTick);
-}
-
-
-MSHR*
-MissQueue::allocateTargetList(Addr addr)
-{
-   MSHR* mshr = mq.allocateTargetList(addr, blkSize);
-   mshr->pkt->flags |= CACHE_LINE_FILL;
-   if (mq.isFull()) {
-       cache->setBlocked(Blocked_NoMSHRs);
-   }
-   return mshr;
-}
-
-bool
-MissQueue::havePending()
-{
-    return mq.havePending() || wb.havePending() || prefetcher->havePending();
-}
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
deleted file mode 100644
index d3560ff36..000000000
--- a/src/mem/cache/miss/miss_queue.hh
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Miss and writeback queue declarations.
- */
-
-#ifndef __MISS_QUEUE_HH__
-#define __MISS_QUEUE_HH__
-
-#include <vector>
-
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/miss/mshr.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "base/statistics.hh"
-
-/**
- * Manages cache misses and writebacks. Contains MSHRs to store miss data
- * and the writebuffer for writes/writebacks.
- * @todo need to handle data on writes better (encapsulate).
- * @todo need to make replacements/writebacks happen in Cache::access
- */
-class MissQueue : public MissBuffer
-{
-  protected:
-    /** The MSHRs. */
-    MSHRQueue mq;
-    /** Write Buffer. */
-    MSHRQueue wb;
-
-    // PARAMTERS
-
-    /** The number of MSHRs in the miss queue. */
-    const int numMSHR;
-    /** The number of targets for each MSHR. */
-    const int numTarget;
-    /** The number of write buffers. */
-    const int writeBuffers;
-
-    /** Increasing order number assigned to each incoming request. */
-    uint64_t order;
-
-    bool prefetchMiss;
-
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
-    /** Number of misses that hit in the MSHRs per command and thread. */
-    Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that hit in the MSHRs. */
-    Stats::Formula demandMshrHits;
-    /** Total number of misses that hit in the MSHRs. */
-    Stats::Formula overallMshrHits;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that miss in the MSHRs. */
-    Stats::Formula demandMshrMisses;
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrMisses;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrUncacheable;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of demand MSHR misses. */
-    Stats::Formula demandMshrMissLatency;
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrMissLatency;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrUncacheableLatency;
-
-    /** The total number of MSHR accesses per command and thread. */
-    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
-    /** The total number of demand MSHR accesses. */
-    Stats::Formula demandMshrAccesses;
-    /** The total number of MSHR accesses. */
-    Stats::Formula overallMshrAccesses;
-
-    /** The miss rate in the MSHRs pre command and thread. */
-    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
-    /** The demand miss rate in the MSHRs. */
-    Stats::Formula demandMshrMissRate;
-    /** The overall miss rate in the MSHRs. */
-    Stats::Formula overallMshrMissRate;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average latency of a demand MSHR miss. */
-    Stats::Formula demandAvgMshrMissLatency;
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrMissLatency;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrUncacheableLatency;
-
-    /** The number of times a thread hit its MSHR cap. */
-    Stats::Vector<> mshr_cap_events;
-    /** The number of times software prefetches caused the MSHR to block. */
-    Stats::Vector<> soft_prefetch_mshr_full;
-
-    Stats::Scalar<> mshr_no_allocate_misses;
-
-    /**
-     * @}
-     */
-
-  private:
-    /** Pointer to the MSHR that has no targets. */
-    MSHR* noTargetMSHR;
-
-    /**
-     * Allocate a new MSHR to handle the provided miss.
-     * @param pkt The miss to buffer.
-     * @param size The number of bytes to fetch.
-     * @param time The time the miss occurs.
-     * @return A pointer to the new MSHR.
-     */
-    MSHR* allocateMiss(PacketPtr &pkt, int size, Tick time);
-
-    /**
-     * Allocate a new WriteBuffer to handle the provided write.
-     * @param pkt The write to handle.
-     * @param size The number of bytes to write.
-     * @param time The time the write occurs.
-     * @return A pointer to the new write buffer.
-     */
-    MSHR* allocateWrite(PacketPtr &pkt, int size, Tick time);
-
-  public:
-    /**
-     * Simple Constructor. Initializes all needed internal storage and sets
-     * parameters.
-     * @param numMSHRs The number of outstanding misses to handle.
-     * @param numTargets The number of outstanding targets to each miss.
-     * @param write_buffers The number of outstanding writes to handle.
-     * @param write_allocate If true, treat write misses the same as reads.
-     */
-    MissQueue(int numMSHRs, int numTargets, int write_buffers,
-              bool write_allocate, bool prefetch_miss);
-
-    /**
-     * Deletes all allocated internal storage.
-     */
-    ~MissQueue();
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Either allocate an MSHR for the request,
-     * or forward it through the write buffer.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     PacketPtr &target);
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    void setBusCmd(PacketPtr &pkt, MemCmd cmd);
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    void restoreOrigCmd(PacketPtr &pkt);
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    void markInService(PacketPtr &pkt, MSHR* mshr);
-
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr &pkt, Tick time);
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    void squash(int threadNum);
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    int getMisses()
-    {
-        return mq.getAllocatedTargets();
-    }
-
-    /**
-     * Searches for the supplied address in the miss queue.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return The MSHR that contains the address, NULL if not found.
-     * @warning Currently only searches the miss queue. If non write allocate
-     * might need to search the write buffer for coherence.
-     */
-    MSHR* findMSHR(Addr addr);
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes The list of writes that match the address.
-     * @return True if any writes are found
-     */
-    bool findWrites(Addr addr, std::vector<MSHR*>& writes);
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param xc The execution context of the address space.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    void doWriteback(Addr addr,
-                     int size, uint8_t *data, bool compressed);
-
-    /**
-     * Perform the given writeback request.
-     * @param pkt The writeback request.
-     */
-    void doWriteback(PacketPtr &pkt);
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    bool havePending();
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    void addTarget(MSHR *mshr, PacketPtr &pkt)
-    {
-        mq.allocateTarget(mshr, pkt);
-    }
-
-    /**
-     * Allocate a MSHR to hold a list of targets to a block involved in a copy.
-     * If the block is marked done then the MSHR already holds the data to
-     * fill the block. Otherwise the block needs to be fetched.
-     * @param addr The address to buffer.
-     * @param asid The address space ID.
-     * @return A pointer to the allocated MSHR.
-     */
-    MSHR* allocateTargetList(Addr addr);
-
-};
-
-#endif //__MISS_QUEUE_HH__
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 74dad658b..218d42339 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -54,45 +54,20 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(MemCmd cmd, Addr _addr, int size,
-               PacketPtr &target)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
 {
     addr = _addr;
-    if (target)
-    {
-        //Have a request, just use it
-        pkt = new Packet(target->req, cmd, Packet::Broadcast, size);
-        pkt->time = curTick;
-        pkt->allocate();
-        pkt->senderState = (Packet::SenderState *)this;
-        allocateTarget(target);
-    }
-    else
-    {
-        //need a request first
-        Request * req = new Request();
-        req->setPhys(addr, size, 0);
-        //Thread context??
-        pkt = new Packet(req, cmd, Packet::Broadcast, size);
-        pkt->time = curTick;
-        pkt->allocate();
-        pkt->senderState = (Packet::SenderState *)this;
-    }
-}
-
-// Since we aren't sure if data is being used, don't copy here.
-/**
- * @todo When we have a "global" data flag, might want to copy data here.
- */
-void
-MSHR::allocateAsBuffer(PacketPtr &target)
-{
-    addr = target->getAddr();
-    threadNum = 0/*target->req->getThreadNum()*/;
-    pkt = new Packet(target->req, target->cmd, -1);
-    pkt->allocate();
-    pkt->senderState = (Packet::SenderState*)this;
-    pkt->time = curTick;
+    size = _size;
+    assert(target);
+    isCacheFill = cacheFill;
+    needsExclusive = target->needsExclusive();
+    _isUncacheable = target->req->isUncacheable();
+    inService = false;
+    threadNum = 0;
+    ntargets = 1;
+    // Don't know of a case where we would allocate a new MSHR for a
+    // snoop (mem0-side request), so set cpuSide to true here.
+    targets.push_back(Target(target, true));
 }
 
 void
@@ -100,8 +75,6 @@ MSHR::deallocate()
 {
     assert(targets.empty());
     assert(ntargets == 0);
-    delete pkt;
-    pkt = NULL;
     inService = false;
     //allocIter = NULL;
     //readyIter = NULL;
@@ -111,16 +84,17 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr &target)
+MSHR::allocateTarget(PacketPtr target, bool cpuSide)
 {
     //If we append an invalidate and we issued a read to the bus,
     //but now have some pending writes, we need to move
     //the invalidate to before the first non-read
-    if (inService && pkt->isRead() && target->isInvalidate()) {
-        std::list<PacketPtr> temp;
+    if (inService && !inServiceForExclusive && needsExclusive
+        && !cpuSide && target->isInvalidate()) {
+        std::list<Target> temp;
 
         while (!targets.empty()) {
-            if (!targets.front()->isRead()) break;
+            if (targets.front().pkt->needsExclusive()) break;
             //Place on top of temp stack
             temp.push_front(targets.front());
             //Remove from targets
@@ -129,7 +103,7 @@ MSHR::allocateTarget(PacketPtr &target)
 
         //Now that we have all the reads off until first non-read, we can
         //place the invalidate on
-        targets.push_front(target);
+        targets.push_front(Target(target, cpuSide));
 
         //Now we pop off the temp_stack and put them back
         while (!temp.empty()) {
@@ -138,22 +112,16 @@ MSHR::allocateTarget(PacketPtr &target)
         }
     }
     else {
-        targets.push_back(target);
+        targets.push_back(Target(target, cpuSide));
     }
 
     ++ntargets;
     assert(targets.size() == ntargets);
-    /**
-     * @todo really prioritize the target commands.
-     */
 
-    if (!inService && target->isWrite()) {
-        pkt->cmd = MemCmd::WriteReq;
-    }
+    needsExclusive = needsExclusive || target->needsExclusive();
 }
 
 
-
 void
 MSHR::dump()
 {
@@ -167,8 +135,8 @@ MSHR::dump()
     for (int i = 0; i < ntargets; i++) {
         assert(tar_it != targets.end());
 
-        ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n",
-                 i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex());
+        ccprintf(cerr, "\t%d: Addr: %x cmd: %s\n",
+                 i, tar_it->pkt->getAddr(), tar_it->pkt->cmdString());
 
         tar_it++;
     }
@@ -177,6 +145,4 @@ MSHR::dump()
 
 MSHR::~MSHR()
 {
-    if (pkt)
-        pkt = NULL;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index d0410acda..b38b69c52 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -36,22 +36,39 @@
 #ifndef __MSHR_HH__
 #define __MSHR_HH__
 
-#include "mem/packet.hh"
 #include <list>
-#include <deque>
 
-class MSHR;
+#include "mem/packet.hh"
+
+class CacheBlk;
+class MSHRQueue;
 
 /**
  * Miss Status and handling Register. This class keeps all the information
  * needed to handle a cache miss including a list of target requests.
  */
-class MSHR {
+class MSHR : public Packet::SenderState
+{
+
   public:
+
+    class Target {
+      public:
+        Tick time;      //!< Time when request was received (for stats)
+        PacketPtr pkt;  //!< Pending request packet.
+        bool cpuSide;   //!< Did request come from cpu side or mem side?
+
+        bool isCpuSide() { return cpuSide; }
+
+        Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick)
+            : time(_time), pkt(_pkt), cpuSide(_cpuSide)
+        {}
+    };
+
     /** Defines the Data structure of the MSHR targetlist. */
-    typedef std::list<PacketPtr> TargetList;
+    typedef std::list<Target> TargetList;
     /** Target list iterator. */
-    typedef std::list<PacketPtr>::iterator TargetListIterator;
+    typedef std::list<Target>::iterator TargetListIterator;
     /** A list of MSHRs. */
     typedef std::list<MSHR *> List;
     /** MSHR list iterator. */
@@ -59,20 +76,35 @@ class MSHR {
     /** MSHR list const_iterator. */
     typedef List::const_iterator ConstIterator;
 
-    /** Address of the miss. */
+    /** Pointer to queue containing this MSHR. */
+    MSHRQueue *queue;
+
+    /** Address of the request. */
     Addr addr;
-    /** Adress space id of the miss. */
-    short asid;
+
+    /** Size of the request. */
+    int size;
+
+    /** Data associated with the request (if a write). */
+    uint8_t *writeData;
+
     /** True if the request has been sent to the bus. */
     bool inService;
+
+    /** True if we will be putting the returned block in the cache */
+    bool isCacheFill;
+    /** True if we need to get an exclusive copy of the block. */
+    bool needsExclusive;
+    /** True if the request is uncacheable */
+    bool _isUncacheable;
+
+    /** True if the request that has been sent to the bus is for en
+     * exclusive copy. */
+    bool inServiceForExclusive;
     /** Thread number of the miss. */
-    int threadNum;
-    /** The request that is forwarded to the next level of the hierarchy. */
-    PacketPtr pkt;
+    short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
-    /** The original requesting command. */
-    MemCmd originalCmd;
     /** Order number of assigned by the miss queue. */
     uint64_t order;
 
@@ -81,6 +113,7 @@ class MSHR {
      * @sa MissQueue, MSHRQueue::readyList
      */
     Iterator readyIter;
+
     /**
      * Pointer to this MSHR on the allocated list.
      * @sa MissQueue, MSHRQueue::allocatedList
@@ -92,6 +125,9 @@ private:
     TargetList targets;
 
 public:
+
+    bool isUncacheable() { return _isUncacheable; }
+
     /**
      * Allocate a miss to this MSHR.
      * @param cmd The requesting command.
@@ -100,14 +136,13 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(MemCmd cmd, Addr addr, int size,
-                  PacketPtr &pkt);
+    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
 
     /**
      * Allocate this MSHR as a buffer for the given request.
      * @param target The memory request to buffer.
      */
-    void allocateAsBuffer(PacketPtr &target);
+    void allocateAsBuffer(PacketPtr target);
 
     /**
      * Mark this MSHR as free.
@@ -118,7 +153,7 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr &target);
+    void allocateTarget(PacketPtr target, bool cpuSide);
 
     /** A simple constructor. */
     MSHR();
@@ -131,7 +166,7 @@ public:
      */
     int getNumTargets()
     {
-        return(ntargets);
+        return ntargets;
     }
 
     /**
@@ -147,9 +182,9 @@ public:
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    PacketPtr getTarget()
+    Target *getTarget()
     {
-        return targets.front();
+        return &targets.front();
     }
 
     /**
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index e9aa89bf8..d58594798 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -29,22 +29,21 @@
  */
 
 /** @file
- * Definition of the MSHRQueue.
+ * Definition of MSHRQueue class functions.
  */
 
 #include "mem/cache/miss/mshr_queue.hh"
-#include "sim/eventq.hh"
 
 using namespace std;
 
-MSHRQueue::MSHRQueue(int num_mshrs, int reserve)
-    : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve)
 {
     allocated = 0;
-    inServiceMSHRs = 0;
-    allocatedTargets = 0;
-    registers = new MSHR[numMSHRs];
-    for (int i = 0; i < numMSHRs; ++i) {
+    inServiceEntries = 0;
+    registers = new MSHR[numEntries];
+    for (int i = 0; i < numEntries; ++i) {
+        registers[i].queue = this;
         freeList.push_back(&registers[i]);
     }
 }
@@ -54,7 +53,7 @@ MSHRQueue::~MSHRQueue()
     delete [] registers;
 }
 
-MSHR*
+MSHR *
 MSHRQueue::findMatch(Addr addr) const
 {
     MSHR::ConstIterator i = allocatedList.begin();
@@ -87,19 +86,19 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
 
 }
 
-MSHR*
-MSHRQueue::findPending(PacketPtr &pkt) const
+MSHR *
+MSHRQueue::findPending(Addr addr, int size) const
 {
     MSHR::ConstIterator i = pendingList.begin();
     MSHR::ConstIterator end = pendingList.end();
     for (; i != end; ++i) {
         MSHR *mshr = *i;
-        if (mshr->addr < pkt->getAddr()) {
-            if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) {
+        if (mshr->addr < addr) {
+            if (mshr->addr + mshr->size > addr) {
                 return mshr;
             }
         } else {
-            if (pkt->getAddr() + pkt->getSize() > mshr->addr) {
+            if (addr + size > mshr->addr) {
                 return mshr;
             }
         }
@@ -107,21 +106,15 @@ MSHRQueue::findPending(PacketPtr &pkt) const
     return NULL;
 }
 
-MSHR*
-MSHRQueue::allocate(PacketPtr &pkt, int size)
+MSHR *
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
 {
-    Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1);
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    if (!pkt->needsResponse()) {
-        mshr->allocateAsBuffer(pkt);
-    } else {
-        mshr->allocate(pkt->cmd, aligned_addr, size, pkt);
-        allocatedTargets += 1;
-    }
+    mshr->allocate(addr, size, pkt, isFill);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
 
@@ -129,51 +122,21 @@ MSHRQueue::allocate(PacketPtr &pkt, int size)
     return mshr;
 }
 
-MSHR*
-MSHRQueue::allocateFetch(Addr addr, int size, PacketPtr &target)
-{
-    MSHR *mshr = freeList.front();
-    assert(mshr->getNumTargets() == 0);
-    freeList.pop_front();
-    mshr->allocate(MemCmd::ReadReq, addr, size, target);
-    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
-
-    allocated += 1;
-    return mshr;
-}
-
-MSHR*
-MSHRQueue::allocateTargetList(Addr addr, int size)
-{
-    MSHR *mshr = freeList.front();
-    assert(mshr->getNumTargets() == 0);
-    freeList.pop_front();
-    PacketPtr dummy;
-    mshr->allocate(MemCmd::ReadReq, addr, size, dummy);
-    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->inService = true;
-    ++inServiceMSHRs;
-    ++allocated;
-    return mshr;
-}
-
 
 void
-MSHRQueue::deallocate(MSHR* mshr)
+MSHRQueue::deallocate(MSHR *mshr)
 {
     deallocateOne(mshr);
 }
 
 MSHR::Iterator
-MSHRQueue::deallocateOne(MSHR* mshr)
+MSHRQueue::deallocateOne(MSHR *mshr)
 {
     MSHR::Iterator retval = allocatedList.erase(mshr->allocIter);
     freeList.push_front(mshr);
     allocated--;
-    allocatedTargets -= mshr->getNumTargets();
     if (mshr->inService) {
-        inServiceMSHRs--;
+        inServiceEntries--;
     } else {
         pendingList.erase(mshr->readyIter);
     }
@@ -192,29 +155,29 @@ MSHRQueue::moveToFront(MSHR *mshr)
 }
 
 void
-MSHRQueue::markInService(MSHR* mshr)
+MSHRQueue::markInService(MSHR *mshr)
 {
     //assert(mshr == pendingList.front());
+#if 0
     if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) {
         assert(mshr->getNumTargets() == 0);
         deallocate(mshr);
         return;
     }
+#endif
     mshr->inService = true;
     pendingList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
-    inServiceMSHRs += 1;
+    inServiceEntries += 1;
     //pendingList.pop_front();
 }
 
 void
-MSHRQueue::markPending(MSHR* mshr, MemCmd cmd)
+MSHRQueue::markPending(MSHR *mshr)
 {
     //assert(mshr->readyIter == NULL);
-    mshr->pkt->cmd = cmd;
-    mshr->pkt->flags &= ~SATISFIED;
     mshr->inService = false;
-    --inServiceMSHRs;
+    --inServiceEntries;
     /**
      * @ todo might want to add rerequests to front of pending list for
      * performance.
@@ -231,11 +194,8 @@ MSHRQueue::squash(int threadNum)
         MSHR *mshr = *i;
         if (mshr->threadNum == threadNum) {
             while (mshr->hasTargets()) {
-                PacketPtr target = mshr->getTarget();
                 mshr->popTarget();
-
                 assert(0/*target->req->getThreadNum()*/ == threadNum);
-                target = NULL;
             }
             assert(!mshr->hasTargets());
             assert(mshr->ntargets==0);
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 5069db661..182dfd5b2 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -32,71 +32,71 @@
  * Declaration of a structure to manage MSHRs.
  */
 
-#ifndef __MSHR_QUEUE_HH__
-#define __MSHR_QUEUE_HH__
+#ifndef __MEM__CACHE__MISS__MSHR_QUEUE_HH__
+#define __MEM__CACHE__MISS__MSHR_QUEUE_HH__
 
 #include <vector>
+
+#include "mem/packet.hh"
 #include "mem/cache/miss/mshr.hh"
 
 /**
  * A Class for maintaining a list of pending and allocated memory requests.
  */
-class MSHRQueue {
+class MSHRQueue
+{
   private:
     /**  MSHR storage. */
-    MSHR* registers;
-    /** Holds pointers to all allocated MSHRs. */
+    MSHR *registers;
+    /** Holds pointers to all allocated entries. */
     MSHR::List allocatedList;
-    /** Holds pointers to MSHRs that haven't been sent to the bus. */
+    /** Holds pointers to entries that haven't been sent to the bus. */
     MSHR::List pendingList;
-    /** Holds non allocated MSHRs. */
+    /** Holds non allocated entries. */
     MSHR::List freeList;
 
     // Parameters
     /**
-     * The total number of MSHRs in this queue. This number is set as the
-     * number of MSHRs requested plus (numReserve - 1). This allows for
-     * the same number of effective MSHRs while still maintaining the reserve.
+     * The total number of entries in this queue. This number is set as the
+     * number of entries requested plus (numReserve - 1). This allows for
+     * the same number of effective entries while still maintaining the reserve.
      */
-    const int numMSHRs;
+    const int numEntries;
 
     /**
-     * The number of MSHRs to hold in reserve. This is needed because copy
-     * operations can allocate upto 4 MSHRs at one time.
+     * The number of entries to hold in reserve. This is needed because copy
+     * operations can allocate upto 4 entries at one time.
      */
     const int numReserve;
 
   public:
-    /** The number of allocated MSHRs. */
+    /** The number of allocated entries. */
     int allocated;
-    /** The number of MSHRs that have been forwarded to the bus. */
-    int inServiceMSHRs;
-    /** The number of targets waiting for response. */
-    int allocatedTargets;
+    /** The number of entries that have been forwarded to the bus. */
+    int inServiceEntries;
 
     /**
-     * Create a queue with a given number of MSHRs.
-     * @param num_mshrs The number of MSHRs in this queue.
-     * @param reserve The minimum number of MSHRs needed to satisfy any access.
+     * Create a queue with a given number of entries.
+     * @param num_entrys The number of entries in this queue.
+     * @param reserve The minimum number of entries needed to satisfy
+     * any access.
      */
-    MSHRQueue(int num_mshrs, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve = 1);
 
     /** Destructor */
     ~MSHRQueue();
 
     /**
-     * Find the first MSHR that matches the provide address and asid.
+     * Find the first MSHR that matches the provided address.
      * @param addr The address to find.
-     * @param asid The address space id.
      * @return Pointer to the matching MSHR, null if not found.
      */
-    MSHR* findMatch(Addr addr) const;
+    MSHR *findMatch(Addr addr) const;
 
     /**
-     * Find and return all the matching MSHRs in the provided vector.
+     * Find and return all the matching entries in the provided vector.
      * @param addr The address to find.
-     * @param asid The address space ID.
-     * @param matches The vector to return pointers to the matching MSHRs.
+     * @param matches The vector to return pointers to the matching entries.
      * @return True if any matches are found, false otherwise.
      * @todo Typedef the vector??
      */
@@ -107,7 +107,7 @@ class MSHRQueue {
      * @param pkt The request to find.
      * @return A pointer to the earliest matching MSHR.
      */
-    MSHR* findPending(PacketPtr &pkt) const;
+    MSHR *findPending(Addr addr, int size) const;
 
     /**
      * Allocates a new MSHR for the request and size. This places the request
@@ -116,60 +116,29 @@ class MSHRQueue {
      * @param size The number in bytes to fetch from memory.
      * @return The a pointer to the MSHR allocated.
      *
-     * @pre There are free MSHRs.
+     * @pre There are free entries.
      */
-    MSHR* allocate(PacketPtr &pkt, int size = 0);
-
-    /**
-     * Allocate a read request for the given address, and places the given
-     * target on the target list.
-     * @param addr The address to fetch.
-     * @param asid The address space for the fetch.
-     * @param size The number of bytes to request.
-     * @param target The first target for the request.
-     * @return Pointer to the new MSHR.
-     */
-    MSHR* allocateFetch(Addr addr, int size, PacketPtr &target);
-
-    /**
-     * Allocate a target list for the given address.
-     * @param addr The address to fetch.
-     * @param asid The address space for the fetch.
-     * @param size The number of bytes to request.
-     * @return Pointer to the new MSHR.
-     */
-    MSHR* allocateTargetList(Addr addr, int size);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
      * free list.
      * @param mshr
      */
-    void deallocate(MSHR* mshr);
-
-    /**
-     * Allocates a target to the given MSHR. Used to keep track of the number
-     * of outstanding targets.
-     * @param mshr The MSHR to allocate the target to.
-     * @param pkt The target request.
-     */
-    void allocateTarget(MSHR* mshr, PacketPtr &pkt)
-    {
-        mshr->allocateTarget(pkt);
-        allocatedTargets += 1;
-    }
+    void deallocate(MSHR *mshr);
 
     /**
-     * Remove a MSHR from the queue. Returns an iterator into the allocatedList
-     * for faster squash implementation.
+     * Remove a MSHR from the queue. Returns an iterator into the
+     * allocatedList for faster squash implementation.
      * @param mshr The MSHR to remove.
      * @return An iterator to the next entry in the allocatedList.
      */
-    MSHR::Iterator deallocateOne(MSHR* mshr);
+    MSHR::Iterator deallocateOne(MSHR *mshr);
 
     /**
-     * Moves the MSHR to the front of the pending list if it is not in service.
-     * @param mshr The mshr to move.
+     * Moves the MSHR to the front of the pending list if it is not
+     * in service.
+     * @param mshr The entry to move.
      */
     void moveToFront(MSHR *mshr);
 
@@ -178,14 +147,13 @@ class MSHRQueue {
      * pendingList. Deallocates the MSHR if it does not expect a response.
      * @param mshr The MSHR to mark in service.
      */
-    void markInService(MSHR* mshr);
+    void markInService(MSHR *mshr);
 
     /**
-     * Mark an in service mshr as pending, used to resend a request.
+     * Mark an in service entry as pending, used to resend a request.
      * @param mshr The MSHR to resend.
-     * @param cmd The command to resend.
      */
-    void markPending(MSHR* mshr, MemCmd cmd);
+    void markPending(MSHR *mshr);
 
     /**
      * Squash outstanding requests with the given thread number. If a request
@@ -204,36 +172,25 @@ class MSHRQueue {
     }
 
     /**
-     * Returns true if there are no free MSHRs.
+     * Returns true if there are no free entries.
      * @return True if this queue is full.
      */
     bool isFull() const
     {
-        return (allocated > numMSHRs - numReserve);
+        return (allocated > numEntries - numReserve);
     }
 
     /**
-     * Returns the request at the head of the pendingList.
+     * Returns the MSHR at the head of the pendingList.
      * @return The next request to service.
      */
-    PacketPtr getReq() const
+    MSHR *getNextMSHR() const
     {
         if (pendingList.empty()) {
             return NULL;
         }
-        MSHR* mshr = pendingList.front();
-        return mshr->pkt;
+        return pendingList.front();
     }
-
-    /**
-     * Returns the number of outstanding targets.
-     * @return the number of allocated targets.
-     */
-    int getAllocatedTargets() const
-    {
-        return allocatedTargets;
-    }
-
 };
 
-#endif //__MSHR_QUEUE_HH__
+#endif //__MEM__CACHE__MISS__MSHR_QUEUE_HH__
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index 966f7d005..d03cfe3ae 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -241,7 +241,6 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             }
 
             pf.push_back(prefetch);
-            prefetch->flags |= CACHE_LINE_FILL;
 
             //Make sure to request the bus, with proper delay
             cache->requestMemSideBus(Request_PF, prefetch->time);
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 42a1fe34f..607e89a75 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -215,14 +215,13 @@ FALRU::findBlock(Addr addr) const
 }
 
 FALRUBlk*
-FALRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                       BlkList &compress_blocks)
+FALRU::findReplacement(Addr addr, PacketList &writebacks)
 {
     FALRUBlk * blk = tail;
     assert(blk->inCache == 0);
     moveToHead(blk);
     tagHash.erase(blk->tag);
-    tagHash[blkAlign(pkt->getAddr())] = blk;
+    tagHash[blkAlign(addr)] = blk;
     if (blk->isValid()) {
         replacements[0]++;
     } else {
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index dabbda740..8cbc79813 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -201,11 +201,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    FALRUBlk* findReplacement(PacketPtr &pkt, PacketList & writebacks,
-                              BlkList &compress_blocks);
+    FALRUBlk* findReplacement(Addr addr, PacketList & writebacks);
 
     /**
      * Return the hit latency of this cache.
@@ -248,10 +246,9 @@ public:
      * Generate the tag from the addres. For fully associative this is just the
      * block address.
      * @param addr The address to get the tag from.
-     * @param blk ignored here
      * @return The tag.
      */
-    Addr extractTag(Addr addr, FALRUBlk *blk) const
+    Addr extractTag(Addr addr) const
     {
         return blkAlign(addr);
     }
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index 9c802d0dc..2f95cdb0f 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -303,11 +303,10 @@ IIC::findBlock(Addr addr) const
 
 
 IICTag*
-IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+IIC::findReplacement(Addr addr, PacketList &writebacks)
 {
-    DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr());
-    unsigned set = hash(pkt->getAddr());
+    DPRINTF(IIC, "Finding Replacement for %x\n", addr);
+    unsigned set = hash(addr);
     IICTag *tag_ptr;
     unsigned long *tmp_data = new unsigned long[numSub];
 
@@ -332,12 +331,14 @@ IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks,
 
     list<unsigned long> tag_indexes;
     repl->doAdvance(tag_indexes);
+/*
     while (!tag_indexes.empty()) {
         if (!tagStore[tag_indexes.front()].isCompressed()) {
             compress_blocks.push_back(&tagStore[tag_indexes.front()]);
         }
         tag_indexes.pop_front();
     }
+*/
 
     tag_ptr->re = (void*)repl->add(tag_ptr-tagStore);
 
@@ -355,7 +356,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
 
     DPRINTF(Cache, "Replacing %x in IIC: %s\n",
             regenerateBlkAddr(tag_ptr->tag,0),
-            tag_ptr->isModified() ? "writeback" : "clean");
+            tag_ptr->isDirty() ? "writeback" : "clean");
     /* write back replaced block data */
     if (tag_ptr && (tag_ptr->isValid())) {
         replacements[0]++;
@@ -363,7 +364,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
         ++sampledRefs;
         tag_ptr->refCount = 0;
 
-        if (tag_ptr->isModified()) {
+        if (tag_ptr->isDirty()) {
 /*	    PacketPtr writeback =
                 buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0),
                                   tag_ptr->req->asid, tag_ptr->xc, blkSize,
@@ -618,24 +619,6 @@ IIC::secondaryChain(Addr tag, unsigned long chain_ptr,
     return NULL;
 }
 
-void
-IIC::decompressBlock(unsigned long index)
-{
-    IICTag *tag_ptr = &tagStore[index];
-    if (tag_ptr->isCompressed()) {
-        // decompress the data here.
-    }
-}
-
-void
-IIC::compressBlock(unsigned long index)
-{
-    IICTag *tag_ptr = &tagStore[index];
-    if (!tag_ptr->isCompressed()) {
-        // Compress the data here.
-    }
-}
-
 void
 IIC::invalidateBlk(IIC::BlkType *tag_ptr)
 {
@@ -672,7 +655,6 @@ void
 IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
                PacketList & writebacks)
 {
-    assert(size < blkSize || !blk->isCompressed());
     DPRINTF(IIC, "Writing %d bytes to %x\n", size,
             blk->tag<<tagShift);
     // Find the number of subblocks needed, (round up)
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index d0663d330..082b3d15e 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -345,17 +345,6 @@ class IIC : public BaseTags
         return hitLatency;
     }
 
-    /**
-     * Generate the tag from the address.
-     * @param addr The address to a get a tag for.
-     * @param blk Ignored here.
-     * @return the tag.
-     */
-    Addr extractTag(Addr addr, IICTag *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
      /**
      * Generate the tag from the address.
      * @param addr The address to a get a tag for.
@@ -422,18 +411,6 @@ class IIC : public BaseTags
         return tmp;
     }
 
-    /**
-     * Decompress a block if it is compressed.
-     * @param index The tag store index for the block to uncompress.
-     */
-    void decompressBlock(unsigned long index);
-
-    /**
-     * Try and compress a block if it is not already compressed.
-     * @param index The tag store index for the block to compress.
-     */
-    void compressBlock(unsigned long index);
-
     /**
      * Invalidate a block.
      * @param blk The block to invalidate.
@@ -462,11 +439,9 @@ class IIC : public BaseTags
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    IICTag* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    IICTag* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Read the data from the internal storage of the given cache block.
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 8e8779774..334312aaf 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -194,10 +194,9 @@ LRU::findBlock(Addr addr) const
 }
 
 LRUBlk*
-LRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+LRU::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
     // grab a replacement candidate
     LRUBlk *blk = sets[set].blks[assoc-1];
     sets[set].moveToHead(blk);
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index 75272544c..26038d709 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -189,11 +189,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    LRUBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    LRUBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -205,17 +203,6 @@ public:
         return (addr >> tagShift);
     }
 
-   /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, LRUBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
     /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
index 5ac87eaba..e22ccbb96 100644
--- a/src/mem/cache/tags/split.cc
+++ b/src/mem/cache/tags/split.cc
@@ -298,27 +298,25 @@ Split::findBlock(Addr addr) const
 }
 
 SplitBlk*
-Split::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+Split::findReplacement(Addr addr, PacketList &writebacks)
 {
     SplitBlk *blk;
 
+    assert(0);
+#if 0
     if (pkt->nic_pkt()) {
         DPRINTF(Split, "finding a replacement for nic_req\n");
         nic_repl++;
         if (lifo && lifo_net)
-            blk = lifo_net->findReplacement(pkt, writebacks,
-                                             compress_blocks);
+            blk = lifo_net->findReplacement(addr, writebacks);
         else if (lru_net)
-            blk = lru_net->findReplacement(pkt, writebacks,
-                                            compress_blocks);
+            blk = lru_net->findReplacement(addr, writebacks);
         // in this case, this is an LRU only cache, it's non partitioned
         else
-            blk = lru->findReplacement(pkt, writebacks, compress_blocks);
+            blk = lru->findReplacement(addr, writebacks);
     } else {
         DPRINTF(Split, "finding replacement for cpu_req\n");
-        blk = lru->findReplacement(pkt, writebacks,
-                                    compress_blocks);
+        blk = lru->findReplacement(addr, writebacks);
         cpu_repl++;
     }
 
@@ -346,6 +344,7 @@ Split::findReplacement(PacketPtr &pkt, PacketList &writebacks,
     // blk attributes for the new blk coming IN
     blk->ts = curTick;
     blk->isNIC = (pkt->nic_pkt()) ? true : false;
+#endif
 
     return blk;
 }
@@ -400,8 +399,13 @@ Split::regenerateBlkAddr(Addr tag, int set) const
 }
 
 Addr
-Split::extractTag(Addr addr, SplitBlk *blk) const
+Split::extractTag(Addr addr) const
 {
+    // need to fix this if we want to use it... old interface of
+    // passing in blk was too weird
+    assert(0);
+    return 0;
+/*
     if (blk->part == 2) {
         if (lifo_net)
             return lifo_net->extractTag(addr);
@@ -411,5 +415,6 @@ Split::extractTag(Addr addr, SplitBlk *blk) const
             panic("this shouldn't happen");
     } else
         return lru->extractTag(addr);
+*/
 }
 
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
index 840b68940..ab48ce769 100644
--- a/src/mem/cache/tags/split.hh
+++ b/src/mem/cache/tags/split.hh
@@ -212,20 +212,17 @@ class Split : public BaseTags
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
 
     /**
      * Generate the tag from the given address.
      * @param addr The address to get the tag from.
-     * @param blk The block to find the partition it's in
      * @return The tag of the address.
      */
-    Addr extractTag(Addr addr, SplitBlk *blk) const;
+    Addr extractTag(Addr addr) const;
 
     /**
      * Calculate the set index from the address.
diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc
index d71d1a3ef..4ee2473a4 100644
--- a/src/mem/cache/tags/split_lifo.cc
+++ b/src/mem/cache/tags/split_lifo.cc
@@ -266,10 +266,9 @@ SplitLIFO::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                           BlkList &compress_blocks)
+SplitLIFO::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
 
     SplitBlk *firstIn = sets[set].firstIn;
     SplitBlk *lastIn = sets[set].lastIn;
@@ -289,7 +288,7 @@ SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks,
     }
 
     DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n",
-            pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status);
+            addr, regenerateBlkAddr(blk->tag, set), blk->status);
     if (blk->isValid()) {
         replacements[0]++;
         totalRefs += blk->refCount;
diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh
index 0f8adf18d..13ccf7ef4 100644
--- a/src/mem/cache/tags/split_lifo.hh
+++ b/src/mem/cache/tags/split_lifo.hh
@@ -212,11 +212,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -228,17 +226,6 @@ public:
         return (addr >> tagShift);
     }
 
-     /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, SplitBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
    /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc
index 7227fb5c1..4d271a92a 100644
--- a/src/mem/cache/tags/split_lru.cc
+++ b/src/mem/cache/tags/split_lru.cc
@@ -213,10 +213,9 @@ SplitLRU::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+SplitLRU::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
     // grab a replacement candidate
     SplitBlk *blk = sets[set].blks[assoc-1];
     sets[set].moveToHead(blk);
diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh
index eb65445ea..a708ef740 100644
--- a/src/mem/cache/tags/split_lru.hh
+++ b/src/mem/cache/tags/split_lru.hh
@@ -195,11 +195,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -211,17 +209,6 @@ public:
         return (addr >> tagShift);
     }
 
-    /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, SplitBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
     /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index a257e16ab..57c6a6381 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -59,15 +59,15 @@ MemCmd::commandInfo[] =
     /* ReadResp */
     { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteReq */
-    { SET4(IsWrite, IsRequest, NeedsResponse, HasData),
+    { SET5(IsWrite, NeedsExclusive, IsRequest, NeedsResponse, HasData),
             WriteResp, "WriteReq" },
     /* WriteResp */
-    { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" },
+    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET4(IsWrite, IsRequest, HasData, NeedsResponse),
+    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
             WritebackAck, "Writeback" },
     /* WritebackAck */
-    { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
@@ -80,27 +80,39 @@ MemCmd::commandInfo[] =
     /* HardPFResp */
     { SET4(IsRead, IsResponse, IsHWPrefetch, HasData),
             InvalidCmd, "HardPFResp" },
-    /* InvalidateReq */
-    { SET2(IsInvalidate, IsRequest), InvalidCmd, "InvalidateReq" },
     /* WriteInvalidateReq */
-    { SET5(IsWrite, IsInvalidate, IsRequest, HasData, NeedsResponse),
+    { SET6(IsWrite, NeedsExclusive, IsInvalidate,
+           IsRequest, HasData, NeedsResponse),
             WriteInvalidateResp, "WriteInvalidateReq" },
     /* WriteInvalidateResp */
-    { SET3(IsWrite, IsInvalidate, IsResponse),
+    { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
             InvalidCmd, "WriteInvalidateResp" },
     /* UpgradeReq */
     { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
     /* ReadExReq */
-    { SET4(IsRead, IsInvalidate, IsRequest, NeedsResponse),
+    { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
             ReadExResp, "ReadExReq" },
     /* ReadExResp */
-    { SET4(IsRead, IsInvalidate, IsResponse, HasData),
+    { SET5(IsRead, NeedsExclusive, IsInvalidate, IsResponse, HasData),
             InvalidCmd, "ReadExResp" },
+    /* LoadLockedReq */
+    { SET4(IsRead, IsLocked, IsRequest, NeedsResponse),
+            ReadResp, "LoadLockedReq" },
+    /* LoadLockedResp */
+    { SET4(IsRead, IsLocked, IsResponse, HasData),
+            InvalidCmd, "LoadLockedResp" },
+    /* StoreCondReq */
+    { SET6(IsWrite, NeedsExclusive, IsLocked,
+           IsRequest, NeedsResponse, HasData),
+            StoreCondResp, "StoreCondReq" },
+    /* StoreCondResp */
+    { SET4(IsWrite, NeedsExclusive, IsLocked, IsResponse),
+            InvalidCmd, "StoreCondResp" },
     /* SwapReq -- for Swap ldstub type operations */
-    { SET4(IsReadWrite, IsRequest, HasData, NeedsResponse),
+    { SET6(IsRead, IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
         SwapResp, "SwapReq" },
     /* SwapResp -- for Swap ldstub type operations */
-    { SET3(IsReadWrite, IsResponse, HasData),
+    { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData),
         InvalidCmd, "SwapResp" }
 };
 
@@ -171,27 +183,28 @@ fixDelayedResponsePacket(PacketPtr func, PacketPtr timing)
 }
 
 bool
-fixPacket(PacketPtr func, PacketPtr timing)
+Packet::checkFunctional(Addr addr, int size, uint8_t *data)
 {
-    Addr funcStart      = func->getAddr();
-    Addr funcEnd        = func->getAddr() + func->getSize() - 1;
-    Addr timingStart    = timing->getAddr();
-    Addr timingEnd      = timing->getAddr() + timing->getSize() - 1;
+    Addr func_start = getAddr();
+    Addr func_end   = getAddr() + getSize() - 1;
+    Addr val_start  = addr;
+    Addr val_end    = val_start + size - 1;
 
-    assert(!(funcStart > timingEnd || timingStart > funcEnd));
+    if (func_start > val_end || val_start > func_end) {
+        // no intersection
+        return false;
+    }
 
-    // this packet can't solve our problem, continue on
-    if (!timing->hasData())
-        return true;
+    // offset of functional request into supplied value (could be
+    // negative if partial overlap)
+    int offset = func_start - val_start;
 
-    if (func->isRead()) {
-        if (funcStart >= timingStart && funcEnd <= timingEnd) {
-            func->allocate();
-            std::memcpy(func->getPtr<uint8_t>(), timing->getPtr<uint8_t>() +
-                    funcStart - timingStart, func->getSize());
-            func->result = Packet::Success;
-            func->flags |= SATISFIED;
-            return false;
+    if (isRead()) {
+        if (func_start >= val_start && func_end <= val_end) {
+            allocate();
+            std::memcpy(getPtr<uint8_t>(), data + offset, getSize());
+            result = Packet::Success;
+            return true;
         } else {
             // In this case the timing packet only partially satisfies
             // the request, so we would need more information to make
@@ -199,25 +212,21 @@ fixPacket(PacketPtr func, PacketPtr timing)
             // something, so the request could continue and get this
             // bit of possibly newer data along with the older data
             // not written to yet.
-            panic("Timing packet only partially satisfies the functional"
-                    "request. Now what?");
+            panic("Memory value only partially satisfies the functional "
+                  "request. Now what?");
         }
-    } else if (func->isWrite()) {
-        if (funcStart >= timingStart) {
-            std::memcpy(timing->getPtr<uint8_t>() + (funcStart - timingStart),
-                   func->getPtr<uint8_t>(),
-                   (std::min(funcEnd, timingEnd) - funcStart) + 1);
-        } else { // timingStart > funcStart
-            std::memcpy(timing->getPtr<uint8_t>(),
-                   func->getPtr<uint8_t>() + (timingStart - funcStart),
-                   (std::min(funcEnd, timingEnd) - timingStart) + 1);
+    } else if (isWrite()) {
+        if (offset >= 0) {
+            std::memcpy(data + offset, getPtr<uint8_t>(),
+                        (std::min(func_end, val_end) - func_start) + 1);
+        } else { // val_start > func_start
+            std::memcpy(data, getPtr<uint8_t>() - offset,
+                        (std::min(func_end, val_end) - val_start) + 1);
         }
         // we always want to keep going with a write
-        return true;
+        return false;
     } else
-        panic("Don't know how to handle command type %#x\n",
-                func->cmdToIndex());
-
+        panic("Don't know how to handle command %s\n", cmdString());
 }
 
 
@@ -247,8 +256,6 @@ operator<<(std::ostream &o, const Packet &p)
         o << "Read ";
     if (p.isWrite())
         o << "Write ";
-    if (p.isReadWrite())
-        o << "Read/Write ";
     if (p.isInvalidate())
         o << "Invalidate ";
     if (p.isRequest())
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index e2349e42f..ca186d875 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -54,16 +54,6 @@ typedef Packet *PacketPtr;
 typedef uint8_t* PacketDataPtr;
 typedef std::list<PacketPtr> PacketList;
 
-//Coherence Flags
-#define NACKED_LINE     (1 << 0)
-#define SATISFIED       (1 << 1)
-#define SHARED_LINE     (1 << 2)
-#define CACHE_LINE_FILL (1 << 3)
-#define COMPRESSED      (1 << 4)
-#define NO_ALLOCATE     (1 << 5)
-
-#define EXPRESS_SNOOP   (1 << 7)
-
 class MemCmd
 {
   public:
@@ -82,12 +72,15 @@ class MemCmd
         HardPFReq,
         SoftPFResp,
         HardPFResp,
-        InvalidateReq,
         WriteInvalidateReq,
         WriteInvalidateResp,
         UpgradeReq,
         ReadExReq,
         ReadExResp,
+        LoadLockedReq,
+        LoadLockedResp,
+        StoreCondReq,
+        StoreCondResp,
         SwapReq,
         SwapResp,
         NUM_MEM_CMDS
@@ -97,18 +90,19 @@ class MemCmd
     /** List of command attributes. */
     enum Attribute
     {
-        IsRead,
-        IsWrite,
-        IsPrefetch,
+        IsRead,         //!< Data flows from responder to requester
+        IsWrite,        //!< Data flows from requester to responder
+        IsPrefetch,     //!< Not a demand access
         IsInvalidate,
-        IsRequest,
-        IsResponse,
-        NeedsResponse,
+        NeedsExclusive, //!< Requires exclusive copy to complete in-cache
+        IsRequest,      //!< Issued by requester
+        IsResponse,     //!< Issue by responder
+        NeedsResponse,  //!< Requester needs response from target
         IsSWPrefetch,
         IsHWPrefetch,
         IsUpgrade,
-        HasData,
-        IsReadWrite,
+        IsLocked,       //!< Alpha/MIPS LL or SC access
+        HasData,        //!< There is an associated payload
         NUM_COMMAND_ATTRIBUTES
     };
 
@@ -141,10 +135,12 @@ class MemCmd
     bool isWrite()  const       { return testCmdAttrib(IsWrite); }
     bool isRequest() const      { return testCmdAttrib(IsRequest); }
     bool isResponse() const     { return testCmdAttrib(IsResponse); }
+    bool needsExclusive() const  { return testCmdAttrib(NeedsExclusive); }
     bool needsResponse() const  { return testCmdAttrib(NeedsResponse); }
     bool isInvalidate() const   { return testCmdAttrib(IsInvalidate); }
     bool hasData() const        { return testCmdAttrib(HasData); }
-    bool isReadWrite() const    { return testCmdAttrib(IsReadWrite); }
+    bool isReadWrite() const    { return isRead() && isWrite(); }
+    bool isLocked() const       { return testCmdAttrib(IsLocked); }
 
     const Command responseCommand() const {
         return commandInfo[cmd].response;
@@ -188,9 +184,6 @@ class Packet
 
     typedef MemCmd::Command Command;
 
-    /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */
-    uint64_t flags;
-
   private:
    /** A pointer to the data being transfered.  It can be differnt
     *    sizes at each level of the heirarchy so it belongs in the
@@ -235,6 +228,14 @@ class Packet
     /** Is the 'src' field valid? */
     bool srcValid;
 
+    enum SnoopFlag {
+        MemInhibit,
+        Shared,
+        NUM_SNOOP_FLAGS
+    };
+
+    /** Coherence snoopFlags for snooping */
+    std::bitset<NUM_SNOOP_FLAGS> snoopFlags;
 
   public:
 
@@ -301,14 +302,17 @@ class Packet
     bool isWrite()  const       { return cmd.isWrite(); }
     bool isRequest() const      { return cmd.isRequest(); }
     bool isResponse() const     { return cmd.isResponse(); }
+    bool needsExclusive() const  { return cmd.needsExclusive(); }
     bool needsResponse() const  { return cmd.needsResponse(); }
     bool isInvalidate() const   { return cmd.isInvalidate(); }
     bool hasData() const        { return cmd.hasData(); }
     bool isReadWrite() const    { return cmd.isReadWrite(); }
+    bool isLocked() const       { return cmd.isLocked(); }
 
-    bool isCacheFill() const    { return (flags & CACHE_LINE_FILL) != 0; }
-    bool isNoAllocate() const   { return (flags & NO_ALLOCATE) != 0; }
-    bool isCompressed() const   { return (flags & COMPRESSED) != 0; }
+    void assertMemInhibit()     { snoopFlags[MemInhibit] = true; }
+    void assertShared()         { snoopFlags[Shared] = true; }
+    bool memInhibitAsserted()   { return snoopFlags[MemInhibit]; }
+    bool sharedAsserted()       { return snoopFlags[Shared]; }
 
     bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN }
 
@@ -327,6 +331,8 @@ class Packet
     /** Accessor function that returns the source index of the packet. */
     short getSrc() const { assert(srcValid); return src; }
     void setSrc(short _src) { src = _src; srcValid = true; }
+    /** Reset source field, e.g. to retransmit packet on different bus. */
+    void clearSrc() { srcValid = false; }
 
     /** Accessor function that returns the destination index of
         the packet. */
@@ -347,13 +353,12 @@ class Packet
     Packet(Request *_req, MemCmd _cmd, short _dest)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr), size(_req->size), dest(_dest),
-           addrSizeValid(_req->validPaddr),
-           srcValid(false),
+           addrSizeValid(_req->validPaddr), srcValid(false),
+           snoopFlags(0),
+           time(curTick),
            req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
            result(Unknown)
     {
-        flags = 0;
-        time = curTick;
     }
 
     /** Alternate constructor if you are trying to create a packet with
@@ -361,14 +366,32 @@ class Packet
      *  this allows for overriding the size/addr of the req.*/
     Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
-           addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize),
-           dest(_dest),
+           addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest),
            addrSizeValid(_req->validPaddr), srcValid(false),
+           snoopFlags(0),
+           time(curTick),
            req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
            result(Unknown)
     {
-        flags = 0;
-        time = curTick;
+    }
+
+    /** Alternate constructor for copying a packet.  Copy all fields
+     * *except* set data allocation as static... even if the original
+     * packet's data was dynamic, we don't want to free it when the
+     * new packet is deallocated.  Note that if original packet used
+     * dynamic data, user must guarantee that the new packet's
+     * lifetime is less than that of the original packet. */
+    Packet(Packet *origPkt)
+        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+           addr(origPkt->addr), size(origPkt->size),
+           dest(origPkt->dest),
+           addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
+           snoopFlags(origPkt->snoopFlags),
+           time(curTick),
+           req(origPkt->req), coherence(origPkt->coherence),
+           senderState(origPkt->senderState), cmd(origPkt->cmd),
+           result(origPkt->result)
+    {
     }
 
     /** Destructor. */
@@ -382,7 +405,7 @@ class Packet
      *   multiple transactions. */
     void reinitFromRequest() {
         assert(req->validPaddr);
-        flags = 0;
+        snoopFlags = 0;
         addr = req->paddr;
         size = req->size;
         time = req->time;
@@ -395,29 +418,40 @@ class Packet
         }
     }
 
-    /** Take a request packet and modify it in place to be suitable
-     *   for returning as a response to that request.  Used for timing
-     *   accesses only.  For atomic and functional accesses, the
-     *   request packet is always implicitly passed back *without*
-     *   modifying the destination fields, so this function
-     *   should not be called. */
-    void makeTimingResponse() {
+    /**
+     * Take a request packet and modify it in place to be suitable for
+     * returning as a response to that request.  The source and
+     * destination fields are *not* modified, as is appropriate for
+     * atomic accesses.
+     */
+    void makeAtomicResponse()
+    {
         assert(needsResponse());
         assert(isRequest());
+        assert(result == Unknown);
         cmd = cmd.responseCommand();
+        result = Success;
+    }
+
+    /**
+     * Perform the additional work required for timing responses above
+     * and beyond atomic responses; i.e., change the destination to
+     * point back to the requester and clear the source field.
+     */
+    void convertAtomicToTimingResponse()
+    {
         dest = src;
         srcValid = false;
     }
 
     /**
      * Take a request packet and modify it in place to be suitable for
-     * returning as a response to that request.
+     * returning as a response to a timing request.
      */
-    void makeAtomicResponse()
+    void makeTimingResponse()
     {
-        assert(needsResponse());
-        assert(isRequest());
-        cmd = cmd.responseCommand();
+        makeAtomicResponse();
+        convertAtomicToTimingResponse();
     }
 
     /**
@@ -493,6 +527,40 @@ class Packet
     template <typename T>
     void set(T v);
 
+    /**
+     * Copy data into the packet from the provided pointer.
+     */
+    void setData(uint8_t *p)
+    {
+        std::memcpy(getPtr<uint8_t>(), p, getSize());
+    }
+
+    /**
+     * Copy data into the packet from the provided block pointer,
+     * which is aligned to the given block size.
+     */
+    void setDataFromBlock(uint8_t *blk_data, int blkSize)
+    {
+        setData(blk_data + getOffset(blkSize));
+    }
+
+    /**
+     * Copy data from the packet to the provided block pointer, which
+     * is aligned to the given block size.
+     */
+    void writeData(uint8_t *p)
+    {
+        std::memcpy(p, getPtr<uint8_t>(), getSize());
+    }
+
+    /**
+     * Copy data from the packet to the memory at the provided pointer.
+     */
+    void writeDataToBlock(uint8_t *blk_data, int blkSize)
+    {
+        writeData(blk_data + getOffset(blkSize));
+    }
+
     /**
      * delete the data pointed to in the data pointer. Ok to call to
      * matter how data was allocted.
@@ -504,15 +572,35 @@ class Packet
 
     /** Do the packet modify the same addresses. */
     bool intersect(PacketPtr p);
+
+    /**
+     * Check a functional request against a memory value represented
+     * by a base/size pair and an associated data array.  If the
+     * functional request is a read, it may be satisfied by the memory
+     * value.  If the functional request is a write, it may update the
+     * memory value.
+     */
+    bool checkFunctional(Addr base, int size, uint8_t *data);
+
+    /**
+     * Check a functional request against a memory value stored in
+     * another packet (i.e. an in-transit request or response).
+     */
+    bool checkFunctional(PacketPtr otherPkt) {
+        return (otherPkt->hasData() &&
+                checkFunctional(otherPkt->getAddr(), otherPkt->getSize(),
+                                otherPkt->getPtr<uint8_t>()));
+    }
 };
 
-/** This function given a functional packet and a timing packet either
- * satisfies the timing packet, or updates the timing packet to
- * reflect the updated state in the timing packet. It returns if the
- * functional packet should continue to traverse the memory hierarchy
- * or not.
+
+
+/** Temporary for backwards compatibility.
  */
-bool fixPacket(PacketPtr func, PacketPtr timing);
+inline
+bool fixPacket(PacketPtr func, PacketPtr timing) {
+    return !func->checkFunctional(timing);
+}
 
 /** This function is a wrapper for the fixPacket field that toggles
  * the hasData bit it is used when a response is waiting in the
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 9d840fe69..93cba96c4 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -58,8 +58,9 @@ PhysicalMemory::PhysicalMemory(Params *p)
         panic("Memory Size not divisible by page size\n");
 
     int map_flags = MAP_ANON | MAP_PRIVATE;
-    pmemAddr = (uint8_t *)mmap(NULL, params()->addrRange.size(), PROT_READ | PROT_WRITE,
-            map_flags, -1, 0);
+    pmemAddr =
+        (uint8_t *)mmap(NULL, params()->addrRange.size(),
+                        PROT_READ | PROT_WRITE, map_flags, -1, 0);
 
     if (pmemAddr == (void *)MAP_FAILED) {
         perror("mmap");
@@ -121,8 +122,9 @@ PhysicalMemory::calculateLatency(PacketPtr pkt)
 // Add load-locked to tracking list.  Should only be called if the
 // operation is a load and the LOCKED flag is set.
 void
-PhysicalMemory::trackLoadLocked(Request *req)
+PhysicalMemory::trackLoadLocked(PacketPtr pkt)
 {
+    Request *req = pkt->req;
     Addr paddr = LockedAddr::mask(req->getPaddr());
 
     // first we check if we already have a locked addr for this
@@ -151,10 +153,11 @@ PhysicalMemory::trackLoadLocked(Request *req)
 // conflict with locked addresses, and for success/failure of store
 // conditionals.
 bool
-PhysicalMemory::checkLockedAddrList(Request *req)
+PhysicalMemory::checkLockedAddrList(PacketPtr pkt)
 {
+    Request *req = pkt->req;
     Addr paddr = LockedAddr::mask(req->getPaddr());
-    bool isLocked = req->isLocked();
+    bool isLocked = pkt->isLocked();
 
     // Initialize return value.  Non-conditional stores always
     // succeed.  Assume conditional stores will fail until proven
@@ -198,74 +201,50 @@ PhysicalMemory::checkLockedAddrList(Request *req)
     return success;
 }
 
-void
-PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
+
+#if TRACING_ON
+
+#define CASE(A, T)                                                      \
+  case sizeof(T):                                                       \
+    DPRINTF(MemoryAccess, A " of size %i on address 0x%x data 0x%x\n",  \
+            pkt->getSize(), pkt->getAddr(), pkt->get<T>());             \
+  break
+
+
+#define TRACE_PACKET(A)                                                 \
+    do {                                                                \
+        switch (pkt->getSize()) {                                       \
+          CASE(A, uint64_t);                                            \
+          CASE(A, uint32_t);                                            \
+          CASE(A, uint16_t);                                            \
+          CASE(A, uint8_t);                                             \
+          default:                                                      \
+            DPRINTF(MemoryAccess, A " of size %i on address 0x%x\n",    \
+                    pkt->getSize(), pkt->getAddr());                    \
+        }                                                               \
+    } while (0)
+
+#else
+
+#define TRACE_PACKET(A)
+
+#endif
+
+Tick
+PhysicalMemory::doAtomicAccess(PacketPtr pkt)
 {
     assert(pkt->getAddr() >= start() &&
            pkt->getAddr() + pkt->getSize() <= start() + size());
 
-    if (pkt->isRead()) {
-        if (pkt->req->isLocked()) {
-            trackLoadLocked(pkt->req);
-        }
-        memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - start(),
-               pkt->getSize());
-#if TRACING_ON
-        switch (pkt->getSize()) {
-          case sizeof(uint64_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-            break;
-          case sizeof(uint32_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-            break;
-          case sizeof(uint16_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-            break;
-          case sizeof(uint8_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-            break;
-          default:
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x\n",
-                    pkt->getSize(), pkt->getAddr());
-        }
-#endif
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(MemoryAccess, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        return 0;
     }
-    else if (pkt->isWrite()) {
-        if (writeOK(pkt->req)) {
-                memcpy(pmemAddr + pkt->getAddr() - start(), pkt->getPtr<uint8_t>(),
-                        pkt->getSize());
-#if TRACING_ON
-            switch (pkt->getSize()) {
-              case sizeof(uint64_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-                break;
-              case sizeof(uint32_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-                break;
-              case sizeof(uint16_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-                break;
-              case sizeof(uint8_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-                break;
-              default:
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x\n",
-                        pkt->getSize(), pkt->getAddr());
-            }
-#endif
-        }
-    } else if (pkt->isInvalidate()) {
-        //upgrade or invalidate
-        pkt->flags |= SATISFIED;
-    } else if (pkt->isReadWrite()) {
+
+    uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start();
+
+    if (pkt->cmd == MemCmd::SwapReq) {
         IntReg overwrite_val;
         bool overwrite_mem;
         uint64_t condition_val64;
@@ -277,66 +256,76 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
         // keep a copy of our possible write value, and copy what is at the
         // memory address into the packet
         std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
-        std::memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - start(),
-               pkt->getSize());
+        std::memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
 
         if (pkt->req->isCondSwap()) {
             if (pkt->getSize() == sizeof(uint64_t)) {
                 condition_val64 = pkt->req->getExtraData();
-                overwrite_mem = !std::memcmp(&condition_val64, pmemAddr +
-                        pkt->getAddr() - start(), sizeof(uint64_t));
+                overwrite_mem = !std::memcmp(&condition_val64, hostAddr,
+                                             sizeof(uint64_t));
             } else if (pkt->getSize() == sizeof(uint32_t)) {
                 condition_val32 = (uint32_t)pkt->req->getExtraData();
-                overwrite_mem = !std::memcmp(&condition_val32, pmemAddr +
-                        pkt->getAddr() - start(), sizeof(uint32_t));
+                overwrite_mem = !std::memcmp(&condition_val32, hostAddr,
+                                             sizeof(uint32_t));
             } else
                 panic("Invalid size for conditional read/write\n");
         }
 
         if (overwrite_mem)
-            std::memcpy(pmemAddr + pkt->getAddr() - start(),
-               &overwrite_val, pkt->getSize());
+            std::memcpy(hostAddr, &overwrite_val, pkt->getSize());
 
-#if TRACING_ON
-        switch (pkt->getSize()) {
-          case sizeof(uint64_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n",
-                    overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't",
-                    condition_val64, overwrite_mem ? "happened" : "didn't happen");
-            break;
-          case sizeof(uint32_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n",
-                    overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't",
-                    condition_val32, overwrite_mem ? "happened" : "didn't happen");
-            break;
-          case sizeof(uint16_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n",
-                    overwrite_mem);
-            break;
-          case sizeof(uint8_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n",
-                    overwrite_mem);
-            break;
-          default:
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x\n",
-                    pkt->getSize(), pkt->getAddr());
+        TRACE_PACKET("Read/Write");
+    } else if (pkt->isRead()) {
+        assert(!pkt->isWrite());
+        if (pkt->isLocked()) {
+            trackLoadLocked(pkt);
+        }
+        memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+        TRACE_PACKET("Read");
+    } else if (pkt->isWrite()) {
+        if (writeOK(pkt)) {
+            memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize());
+            TRACE_PACKET("Write");
+        }
+    } else if (pkt->isInvalidate()) {
+        //upgrade or invalidate
+        if (pkt->needsResponse()) {
+            pkt->makeAtomicResponse();
         }
-#endif
     } else {
         panic("unimplemented");
     }
 
+    if (pkt->needsResponse()) {
+        pkt->makeAtomicResponse();
+    }
+    return calculateLatency(pkt);
+}
+
+
+void
+PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
+{
+    assert(pkt->getAddr() >= start() &&
+           pkt->getAddr() + pkt->getSize() <= start() + size());
+
+    uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start();
+
+    if (pkt->cmd == MemCmd::ReadReq) {
+        memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+        TRACE_PACKET("Read");
+    } else if (pkt->cmd == MemCmd::WriteReq) {
+        memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize());
+        TRACE_PACKET("Write");
+    } else {
+        panic("PhysicalMemory: unimplemented functional command %s",
+              pkt->cmdString());
+    }
+
     pkt->result = Packet::Success;
 }
 
+
 Port *
 PhysicalMemory::getPort(const std::string &if_name, int idx)
 {
@@ -407,8 +396,7 @@ PhysicalMemory::MemoryPort::deviceBlockSize()
 Tick
 PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 {
-    memory->doFunctionalAccess(pkt);
-    return memory->calculateLatency(pkt);
+    return memory->doAtomicAccess(pkt);
 }
 
 void
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index b9af5d334..8b13d32c1 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -112,12 +112,12 @@ class PhysicalMemory : public MemObject
     // inline a quick check for an empty locked addr list (hopefully
     // the common case), and do the full list search (if necessary) in
     // this out-of-line function
-    bool checkLockedAddrList(Request *req);
+    bool checkLockedAddrList(PacketPtr pkt);
 
     // Record the address of a load-locked operation so that we can
     // clear the execution context's lock flag if a matching store is
     // performed
-    void trackLoadLocked(Request *req);
+    void trackLoadLocked(PacketPtr pkt);
 
     // Compare a store address with any locked addresses so we can
     // clear the lock flag appropriately.  Return value set to 'false'
@@ -126,17 +126,18 @@ class PhysicalMemory : public MemObject
     // requesting execution context), 'true' otherwise.  Note that
     // this method must be called on *all* stores since even
     // non-conditional stores must clear any matching lock addresses.
-    bool writeOK(Request *req) {
+    bool writeOK(PacketPtr pkt) {
+        Request *req = pkt->req;
         if (lockedAddrList.empty()) {
             // no locked addrs: nothing to check, store_conditional fails
-            bool isLocked = req->isLocked();
+            bool isLocked = pkt->isLocked();
             if (isLocked) {
                 req->setExtraData(0);
             }
             return !isLocked; // only do write if not an sc
         } else {
             // iterate over list...
-            return checkLockedAddrList(req);
+            return checkLockedAddrList(pkt);
         }
     }
 
@@ -175,6 +176,7 @@ class PhysicalMemory : public MemObject
     unsigned int drain(Event *de);
 
   protected:
+    Tick doAtomicAccess(PacketPtr pkt);
     void doFunctionalAccess(PacketPtr pkt);
     virtual Tick calculateLatency(PacketPtr pkt);
     void recvStatusChange(Port::Status status);
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index ed4c0c172..2644a504c 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -67,14 +67,17 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // code to hanldle nacks here, but I'm pretty sure it didn't work
     // correctly with the drain code, so that would need to be fixed
     // if we ever added it back.
-    assert(pkt->result != Packet::Nacked);
+    assert(pkt->isRequest());
+    assert(pkt->result == Packet::Unknown);
+    bool needsResponse = pkt->needsResponse();
     Tick latency = recvAtomic(pkt);
     // turn packet around to go back to requester if response expected
-    if (pkt->needsResponse()) {
-        pkt->makeTimingResponse();
+    if (needsResponse) {
+        // recvAtomic() should already have turned packet into atomic response
+        assert(pkt->isResponse());
+        pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
-    }
-    else if (pkt->cmd != MemCmd::UpgradeReq) {
+    } else {
         delete pkt->req;
         delete pkt;
     }
-- 
cgit v1.2.3


From 83af0fdcf57175adf8077c51e9ba872dd2c04b76 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 21 Jun 2007 11:59:17 -0700
Subject: Getting closer...

configs/example/memtest.py:
    Add progress interval option.
src/base/traceflags.py:
    Add MemTest flag.
src/cpu/memtest/memtest.cc:
    Clean up tracing.
src/cpu/memtest/memtest.hh:
    Get rid of unused code.

--HG--
extra : convert_revision : 92bd8241a6c90bfb6d908e5a5132cbdb500cbb87
---
 configs/example/memtest.py                    |   7 +-
 src/base/traceflags.py                        |   1 +
 src/cpu/memtest/memtest.cc                    | 140 ++-----
 src/cpu/memtest/memtest.hh                    |  10 -
 src/mem/cache/base_cache.cc                   |   6 +-
 src/mem/cache/base_cache.hh                   | 109 ++++--
 src/mem/cache/cache.hh                        |  18 +-
 src/mem/cache/cache_impl.hh                   | 544 ++++++++++++--------------
 src/mem/cache/coherence/coherence_protocol.cc |   3 +-
 src/mem/cache/miss/mshr.cc                    |   4 +-
 src/mem/cache/miss/mshr.hh                    |   2 +-
 src/mem/cache/miss/mshr_queue.cc              |   9 +-
 src/mem/cache/miss/mshr_queue.hh              |   7 +-
 src/mem/cache/prefetch/base_prefetcher.cc     |   8 +-
 src/mem/packet.cc                             |  12 +-
 src/mem/packet.hh                             |   3 +-
 16 files changed, 410 insertions(+), 473 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 9027a9866..0bc12e7bd 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -60,6 +60,11 @@ parser.add_option("-u", "--uncacheable", type="int", default=0,
                   help="Target percentage of uncacheable accesses "
                   "[default: %default]")
 
+parser.add_option("--progress", type="int", default=1000,
+                  metavar="NLOADS",
+                  help="Progress message interval "
+                  "[default: %default]")
+
 (options, args) = parser.parse_args()
 
 if args:
@@ -112,7 +117,7 @@ if options.numtesters > block_size:
 cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
                  percent_functional=options.functional,
                  percent_uncacheable=options.uncacheable,
-                 progress_interval=1000)
+                 progress_interval=options.progress)
          for i in xrange(options.numtesters) ]
 
 # system simulated
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 6b241c410..f4cf7dfd7 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -128,6 +128,7 @@ baseFlags = [
     'Mbox',
     'MemDepUnit',
     'MemoryAccess',
+    'MemTest',
     'O3CPU',
     'OzoneCPU',
     'OzoneLSQ',
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 5d89f1b82..6e8c5d0bf 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -191,29 +191,25 @@ MemTest::init()
     // memory should be 0; no need to initialize them.
 }
 
-static void
-printData(ostream &os, uint8_t *data, int nbytes)
-{
-    os << hex << setfill('0');
-    // assume little-endian: print bytes from highest address to lowest
-    for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) {
-        os << setw(2) << (unsigned)*dp;
-    }
-    os << dec;
-}
 
 void
 MemTest::completeRequest(PacketPtr pkt)
 {
+    Request *req = pkt->req;
+
+    DPRINTF(MemTest, "completing %s at address %x (blk %x)\n",
+            pkt->isWrite() ? "write" : "read",
+            req->getPaddr(), blockAddr(req->getPaddr()));
+
     MemTestSenderState *state =
         dynamic_cast<MemTestSenderState *>(pkt->senderState);
 
     uint8_t *data = state->data;
     uint8_t *pkt_data = pkt->getPtr<uint8_t>();
-    Request *req = pkt->req;
 
     //Remove the address from the list of outstanding
-    std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr());
+    std::set<unsigned>::iterator removeAddr =
+        outstandingAddrs.find(req->getPaddr());
     assert(removeAddr != outstandingAddrs.end());
     outstandingAddrs.erase(removeAddr);
 
@@ -237,39 +233,17 @@ MemTest::completeRequest(PacketPtr pkt)
         }
 
         if (numReads >= maxLoads)
-            exitSimLoop("Maximum number of loads reached!");
+            exitSimLoop("maximum number of loads reached");
         break;
 
       case MemCmd::WriteResp:
         numWritesStat++;
         break;
-/*
-      case Copy:
-        //Also remove dest from outstanding list
-        removeAddr = outstandingAddrs.find(req->dest);
-        assert(removeAddr != outstandingAddrs.end());
-        outstandingAddrs.erase(removeAddr);
-        numCopiesStat++;
-        break;
-*/
+
       default:
         panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt());
     }
 
-    if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-        cerr << name() << ": completed "
-             << (pkt->isWrite() ? "write" : "read")
-             << " access of "
-             << dec << pkt->getSize() << " bytes at address 0x"
-             << hex << req->getPaddr()
-             << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-             << ", value = 0x";
-        printData(cerr, pkt_data, pkt->getSize());
-        cerr << " @ cycle " << dec << curTick;
-
-        cerr << endl;
-    }
-
     noResponseCycles = 0;
     delete state;
     delete [] data;
@@ -325,7 +299,7 @@ MemTest::tick()
     //mem tester
     //We can eliminate the lower bits of the offset, and then use the id
     //to offset within the blks
-    offset &= ~63; //Not the low order bits
+    offset = blockAddr(offset);
     offset += id;
     access_size = 0;
 
@@ -351,29 +325,23 @@ MemTest::tick()
     if (cmd < percentReads) {
         // read
 
-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per address
+        // per tester This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
         if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
             delete [] result;
             delete req;
             return;
         }
-        else outstandingAddrs.insert(paddr);
+
+        outstandingAddrs.insert(paddr);
 
         // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin
         funcPort.readBlob(req->getPaddr(), result, req->getSize());
 
-        if (blockAddr(paddr) == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating read "
-                 << ((probe) ? "probe of " : "access of ")
-                 << dec << req->getSize() << " bytes from addr 0x"
-                 << hex << paddr
-                 << " (0x" << hex << blockAddr(paddr) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
+        DPRINTF(MemTest,
+                "initiating read at address %x (blk %x) expecting %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), *result);
 
         PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
         pkt->dataDynamicArray(new uint8_t[req->getSize()]);
@@ -385,36 +353,25 @@ MemTest::tick()
             pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
-//	    req->completionEvent = new MemCompleteEvent(req, result, this);
             sendPkt(pkt);
         }
     } else {
         // write
 
-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per addreess
+        // per tester.  This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
         if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
             delete [] result;
             delete req;
             return;
         }
 
-        else outstandingAddrs.insert(paddr);
+        outstandingAddrs.insert(paddr);
+
+        DPRINTF(MemTest, "initiating write at address %x (blk %x) value %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff);
 
-/*
-        if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-            cerr << name() << ": initiating write "
-                 << ((probe)?"probe of ":"access of ")
-                 << dec << req->getSize() << " bytes (value = 0x";
-            printData(cerr, data_pkt->getPtr(), req->getSize());
-            cerr << ") to addr 0x"
-                 << hex << req->getPaddr()
-                 << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
-*/
         PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
         uint8_t *pkt_data = new uint8_t[req->getSize()];
         pkt->dataDynamicArray(pkt_data);
@@ -429,54 +386,9 @@ MemTest::tick()
             pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
-//	    req->completionEvent = new MemCompleteEvent(req, NULL, this);
             sendPkt(pkt);
         }
     }
-/*    else {
-        // copy
-        unsigned source_align = random() % 100;
-        unsigned dest_align = random() % 100;
-        unsigned offset2 = random() % size;
-
-        Addr source = ((base) ? baseAddr1 : baseAddr2) + offset;
-        Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2;
-        if (outstandingAddrs.find(source) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(source);
-        if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(dest);
-
-        if (source_align >= percentSourceUnaligned) {
-            source = blockAddr(source);
-        }
-        if (dest_align >= percentDestUnaligned) {
-            dest = blockAddr(dest);
-        }
-        req->cmd = Copy;
-        req->flags &= ~UNCACHEABLE;
-        req->paddr = source;
-        req->dest = dest;
-        delete [] req->data;
-        req->data = new uint8_t[blockSize];
-        req->size = blockSize;
-        if (source == traceBlockAddr || dest == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating copy of "
-                 << dec << req->size << " bytes from addr 0x"
-                 << hex << source
-                 << " (0x" << hex << blockAddr(source) << ")"
-                 << " to addr 0x"
-                 << hex << dest
-                 << " (0x" << hex << blockAddr(dest) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }*
-        cacheInterface->access(req);
-        uint8_t result[blockSize];
-        checkMem->access(Read, source, &result, blockSize);
-        checkMem->access(Write, dest, &result, blockSize);
-    }
-*/
 }
 
 void
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 565fafb77..f4713709a 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -35,8 +35,6 @@
 #include <set>
 
 #include "base/statistics.hh"
-//#include "mem/functional/functional.hh"
-//#include "mem/mem_interface.hh"
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 #include "sim/sim_object.hh"
@@ -50,9 +48,6 @@ class MemTest : public MemObject
   public:
 
     MemTest(const std::string &name,
-//	    MemInterface *_cache_interface,
-//	    PhysicalMemory *main_mem,
-//	    PhysicalMemory *check_mem,
             unsigned _memorySize,
             unsigned _percentReads,
             unsigned _percentFunctional,
@@ -136,12 +131,7 @@ class MemTest : public MemObject
         uint8_t *data;
     };
 
-//    Request *dataReq;
     PacketPtr retryPkt;
-//    MemInterface *cacheInterface;
-//    PhysicalMemory *mainMem;
-//    PhysicalMemory *checkMem;
-//    SimpleThread *thread;
 
     bool accessRetry;
 
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index c7006550b..8b476e100 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -50,8 +50,9 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
 
 BaseCache::BaseCache(const std::string &name, Params &params)
     : MemObject(name),
-      mshrQueue(params.numMSHRs, 4),
-      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
+      mshrQueue(params.numMSHRs, 4, MSHRQueue_MSHRs),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
+                  MSHRQueue_WriteBuffer),
       blkSize(params.blkSize),
       numTarget(params.numTargets),
       blocked(0),
@@ -128,6 +129,7 @@ BaseCache::init()
     cpuSidePort->sendStatusChange(Port::RangeChange);
 }
 
+
 void
 BaseCache::regStats()
 {
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 5969b4b3f..10fd3289c 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -54,41 +54,49 @@
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 
-/**
- * Reasons for Caches to be Blocked.
- */
-enum BlockedCause{
-    Blocked_NoMSHRs,
-    Blocked_NoTargets,
-    Blocked_NoWBBuffers,
-    Blocked_Coherence,
-    NUM_BLOCKED_CAUSES
-};
-
-/**
- * Reasons for cache to request a bus.
- */
-enum RequestCause{
-    Request_MSHR,
-    Request_WB,
-    Request_Coherence,
-    Request_PF
-};
-
 class MSHR;
 /**
  * A basic cache interface. Implements some common functions for speed.
  */
 class BaseCache : public MemObject
 {
+    /**
+     * Indexes to enumerate the MSHR queues.
+     */
+    enum MSHRQueueIndex {
+        MSHRQueue_MSHRs,
+        MSHRQueue_WriteBuffer
+    };
+
+    /**
+     * Reasons for caches to be blocked.
+     */
+    enum BlockedCause {
+        Blocked_NoMSHRs = MSHRQueue_MSHRs,
+        Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
+        Blocked_NoTargets,
+        NUM_BLOCKED_CAUSES
+    };
+
+  public:
+    /**
+     * Reasons for cache to request a bus.
+     */
+    enum RequestCause {
+        Request_MSHR = MSHRQueue_MSHRs,
+        Request_WB = MSHRQueue_WriteBuffer,
+        Request_PF,
+        NUM_REQUEST_CAUSES
+    };
+
+  private:
+
     class CachePort : public SimpleTimingPort
     {
       public:
         BaseCache *cache;
 
       protected:
-        Event *responseEvent;
-
         CachePort(const std::string &_name, BaseCache *_cache);
 
         virtual void recvStatusChange(Status status);
@@ -154,6 +162,36 @@ class BaseCache : public MemObject
     /** Write/writeback buffer */
     MSHRQueue writeBuffer;
 
+    MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
+                                 PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHR *mshr = mq->allocate(addr, size, pkt);
+        mshr->order = order++;
+
+        if (mq->isFull()) {
+            setBlocked((BlockedCause)mq->index);
+        }
+
+        if (requestBus) {
+            requestMemSideBus((RequestCause)mq->index, time);
+        }
+
+        return mshr;
+    }
+
+    void markInServiceInternal(MSHR *mshr)
+    {
+        MSHRQueue *mq = mshr->queue;
+        bool wasFull = mq->isFull();
+        mq->markInService(mshr);
+        if (!mq->havePending()) {
+            deassertMemSideBusRequest((RequestCause)mq->index);
+        }
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
+    }
+
     /** Block size of this cache */
     const int blkSize;
 
@@ -382,6 +420,31 @@ class BaseCache : public MemObject
     Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }
 
 
+    MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        return allocateBufferInternal(&mshrQueue,
+                                      blockAlign(pkt->getAddr()), blkSize,
+                                      pkt, time, requestBus);
+    }
+
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHRQueue *mq = NULL;
+
+        if (pkt->isWrite() && !pkt->isRead()) {
+            /**
+             * @todo Add write merging here.
+             */
+            mq = &writeBuffer;
+        } else {
+            mq = &mshrQueue;
+        }
+
+        return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                      pkt, time, requestBus);
+    }
+
+
     /**
      * Returns true if the cache is blocked for accesses.
      */
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 16d15cf86..06fce1a71 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -179,7 +179,7 @@ class Cache : public BaseCache
      * @return Pointer to the cache block touched by the request. NULL if it
      * was a miss.
      */
-    bool access(PacketPtr pkt, BlkType *blk, int & lat);
+    bool access(PacketPtr pkt, BlkType *&blk, int &lat);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
@@ -201,7 +201,7 @@ class Cache : public BaseCache
 
     bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyTarget(MSHR::Target *target, BlkType *blk);
-    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+    bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
 
@@ -310,15 +310,16 @@ class Cache : public BaseCache
      * @param isFill Whether to fetch & allocate a block
      *               or just forward the request.
      */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
-                         bool requestBus);
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);
 
     /**
      * Selects a outstanding request to service.
      * @return The request to service, NULL if none found.
      */
+    PacketPtr getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                           bool needsExclusive);
     MSHR *getNextMSHR();
-    PacketPtr getPacket();
+    PacketPtr getTimingPacket();
 
     /**
      * Marks a request as in service (sent on the bus). This can have side
@@ -328,13 +329,6 @@ class Cache : public BaseCache
      */
     void markInService(MSHR *mshr);
 
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr pkt, Tick time);
-
     /**
      * Perform the given writeback request.
      * @param pkt The writeback request.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0f66e613c..81fcb4158 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -152,40 +152,21 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 template<class TagStore, class Coherence>
 MSHR *
 Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool isFill, bool requestBus)
+                                          bool requestBus)
 {
-    int  size = isFill ? blkSize : pkt->getSize();
-    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+    MSHRQueue *mq = NULL;
 
-    MSHR *mshr = NULL;
-
-    if (pkt->isWrite()) {
+    if (pkt->isWrite() && !pkt->isRead()) {
         /**
          * @todo Add write merging here.
          */
-        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-
-        if (writeBuffer.isFull()) {
-            setBlocked(Blocked_NoWBBuffers);
-        }
-
-        if (requestBus) {
-            requestMemSideBus(Request_WB, time);
-        }
+        mq = &writeBuffer;
     } else {
-        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-        if (mshrQueue.isFull()) {
-            setBlocked(Blocked_NoMSHRs);
-        }
-        if (requestBus) {
-            requestMemSideBus(Request_MSHR, time);
-        }
+        mq = &mshrQueue;
     }
 
-    assert(mshr != NULL);
-    return mshr;
+    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                  pkt, time, requestBus);
 }
 
 
@@ -193,33 +174,7 @@ template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 {
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if (mshr->queue == &writeBuffer) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = writeBuffer.isFull();
-        writeBuffer.markInService(mshr);
-        if (!writeBuffer.havePending()){
-            deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !writeBuffer.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        assert(mshr->queue == &mshrQueue);
-        unblock = mshrQueue.isFull();
-        mshrQueue.markInService(mshr);
-        if (!mshrQueue.havePending()){
-            deassertMemSideBusRequest(Request_MSHR);
-        }
+    markInServiceInternal(mshr);
 #if 0
         if (mshr->originalCmd == MemCmd::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -231,14 +186,6 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
             }
         }
 #endif
-        if (unblock) {
-            unblock = !mshrQueue.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        clearBlocked(cause);
-    }
 }
 
 
@@ -275,9 +222,16 @@ Cache<TagStore,Coherence>::squash(int threadNum)
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
+    if (pkt->req->isUncacheable())  {
+        blk = NULL;
+        lat = hitLatency;
+        return false;
+    }
+
     bool satisfied = false;  // assume the worst
+    blk = tags->findBlock(pkt->getAddr(), lat);
 
     if (prefetchAccess) {
         //We are determining prefetches on access stream, call prefetcher
@@ -307,6 +261,8 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
 
+            // Check RMW operations first since both isRead() and
+            // isWrite() will be true for them
             if (pkt->cmd == MemCmd::SwapReq) {
                 cmpAndSwap(blk, pkt);
             } else if (pkt->isWrite()) {
@@ -314,12 +270,16 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
                     blk->status |= BlkDirty;
                     pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else {
-                assert(pkt->isRead());
+            } else if (pkt->isRead()) {
                 if (pkt->isLocked()) {
                     blk->trackLoadLocked(pkt);
                 }
                 pkt->setDataFromBlock(blk->data, blkSize);
+            } else {
+                // Not a read or write... must be an upgrade.  it's OK
+                // to just ack those as long as we have an exclusive
+                // copy at this level.
+                assert(pkt->cmd == MemCmd::UpgradeReq);
             }
         } else {
             // permission violation... nothing to do here, leave unsatisfied
@@ -351,19 +311,24 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // we charge hitLatency for doing just about anything here
     Tick time =  curTick + hitLatency;
 
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return true;
+    }
+
     if (pkt->req->isUncacheable()) {
-        allocateBuffer(pkt, time, false, true);
+        allocateBuffer(pkt, time, true);
         assert(pkt->needsResponse()); // else we should delete it here??
         return true;
     }
 
     PacketList writebacks;
     int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
     bool satisfied = false;
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
     if (!mshr) {
@@ -373,6 +338,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         // cache block... a more aggressive system could detect the
         // overlap (if any) and forward data out of the MSHRs, but we
         // don't do that yet)
+        BlkType *blk = NULL;
         satisfied = access(pkt, blk, lat);
     }
 
@@ -401,7 +367,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateBuffer(wbPkt, time, false, true);
+        allocateBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
 
@@ -435,7 +401,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             // always mark as cache fill for now... if we implement
             // no-write-allocate or bypass accesses this will have to
             // be changed.
-            allocateBuffer(pkt, time, true, true);
+            allocateMissBuffer(pkt, time, true);
         }
     }
 
@@ -449,54 +415,109 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }
 
 
+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                                        bool needsExclusive)
+{
+    bool blkValid = blk && blk->isValid();
+
+    if (cpu_pkt->req->isUncacheable()) {
+        assert(blk == NULL);
+        return NULL;
+    }
+
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback ||
+         cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+            // For now, writebacks from upper-level caches that
+            // completely miss in the cache just go through. If we had
+            // "fast write" support (where we could write the whole
+            // block w/o fetching new data) we might want to allocate
+            // on writeback misses instead.
+        return NULL;
+    }
+
+    MemCmd cmd;
+    const bool useUpgrades = true;
+    if (blkValid && useUpgrades) {
+        // only reason to be here is that blk is shared
+        // (read-only) and we need exclusive
+        assert(needsExclusive && !blk->isWritable());
+        cmd = MemCmd::UpgradeReq;
+    } else {
+        // block is invalid
+        cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    }
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
+
+    pkt->allocate();
+    return pkt;
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
+    int lat = hitLatency;
+
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return lat;
+    }
+
     // should assert here that there are no outstanding MSHRs or
     // writebacks... that would mean that someone used an atomic
     // access in timing mode
 
-    if (pkt->req->isUncacheable()) {
-        // Uncacheables just go through
-        return memSidePort->sendAtomic(pkt);
-    }
-
-    PacketList writebacks;
-    int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
-    bool satisfied = access(pkt, blk, lat);
+    BlkType *blk = NULL;
 
-    if (!satisfied) {
+    if (!access(pkt, blk, lat)) {
         // MISS
-        CacheBlk::State old_state = (blk) ? blk->status : 0;
-        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
-        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
-        busPkt.allocate();
+        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
 
-        DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                busPkt.cmdString(), busPkt.getAddr());
+        bool isCacheFill = (busPkt != NULL);
 
-        lat += memSidePort->sendAtomic(&busPkt);
+        if (busPkt == NULL) {
+            // just forwarding the same request to the next level
+            // no local cache operation involved
+            busPkt = pkt;
+        }
 
-        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                busPkt.cmdString(), busPkt.getAddr(), old_state);
+        DPRINTF(Cache, "Sending an atomic %s for %x\n",
+                busPkt->cmdString(), busPkt->getAddr());
 
-        blk = handleFill(&busPkt, blk, writebacks);
-        bool status = satisfyCpuSideRequest(pkt, blk);
-        assert(status);
-    }
+#if TRACING_ON
+        CacheBlk::State old_state = blk ? blk->status : 0;
+#endif
 
-    // We now have the block one way or another (hit or completed miss)
+        lat += memSidePort->sendAtomic(busPkt);
 
-    // Handle writebacks if needed
-    while (!writebacks.empty()){
-        PacketPtr wbPkt = writebacks.front();
-        memSidePort->sendAtomic(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt->cmdString(), busPkt->getAddr(), old_state);
+
+        if (isCacheFill) {
+            PacketList writebacks;
+            blk = handleFill(busPkt, blk, writebacks);
+            bool status = satisfyCpuSideRequest(pkt, blk);
+            assert(status);
+            delete busPkt;
+
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                PacketPtr wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
+            }
+        }
     }
 
+    // We now have the block one way or another (hit or completed miss)
+
     if (pkt->needsResponse()) {
         pkt->makeAtomicResponse();
         pkt->result = Packet::Success;
@@ -553,98 +574,94 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 //
 /////////////////////////////////////////////////////
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
+bool
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            mshrQueue.markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
             }
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        return true;
     } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            MSHR::Target *target = mshr->getTarget();
-            assert(target->cpuSide);
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                target->pkt->setData(pkt->getPtr<uint8_t>());
-            }
-            cpuSidePort->respond(target->pkt, time);
-            assert(!mshr->hasTargets());
+        return false;
+    }
+}
+
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
+{
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
+}
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
+{
+    // respond to MSHR targets, if any
+
+    // First offset for critical word first calculations
+    int initial_offset = 0;
+
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
+    }
+
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
+
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            MSHRQueue *mq = mshr->queue;
+            mq->markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return false;
         }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(!mshr->isCacheFill);
-            while (mshr->hasTargets()) {
-                MSHR::Target *target = mshr->getTarget();
-                assert(target->isCpuSide());
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    target->pkt->setData(pkt->getPtr<uint8_t>());
-                }
-                cpuSidePort->respond(target->pkt, time);
-            }
+
+
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
         }
 
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = writeBuffer.isFull();
-            writeBuffer.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !writeBuffer.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
-            }
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
         }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
-    if (unblock || unblock_target) {
-        clearBlocked(cause);
-    }
+
+    return true;
 }
 
 
@@ -665,21 +682,60 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     assert(pkt->result == Packet::Success);
     DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
+    MSHRQueue *mq = mshr->queue;
+    bool wasFull = mq->isFull();
+
+    if (mshr == noTargetMSHR) {
+        // we always clear at least one target
+        clearBlocked(Blocked_NoTargets);
+        noTargetMSHR = NULL;
+    }
+
+    // Can we deallocate MSHR when done?
+    bool deallocate = false;
+
     if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
-        satisfyMSHR(mshr, pkt, blk);
+        deallocate = satisfyMSHR(mshr, pkt, blk);
         // copy writebacks to write buffer
         while (!writebacks.empty()) {
             PacketPtr wbPkt = writebacks.front();
-            allocateBuffer(wbPkt, time, false, true);
+            allocateBuffer(wbPkt, time, true);
             writebacks.pop_front();
         }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+
+        while (mshr->hasTargets()) {
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->isCpuSide());
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+        }
+        assert(!mshr->hasTargets());
+        deallocate = true;
+    }
+
+    if (deallocate) {
+        mq->deallocate(mshr);
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
     }
-    handleResponse(pkt, time);
 }
 
 
@@ -717,6 +773,8 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
     Addr addr = pkt->getAddr();
 
     if (blk == NULL) {
+        // better have read new data
+        assert(pkt->isRead());
 
         // need to do a replacement
         blk = tags->findReplacement(addr, writebacks);
@@ -733,7 +791,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 
         blk->tag = tags->extractTag(addr);
         blk->status = coherence->getNewState(pkt);
-        assert(pkt->isRead());
     } else {
         // existing block... probably an upgrade
         assert(blk->tag == tags->extractTag(addr));
@@ -759,90 +816,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
-{
-    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
-        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-        if (pkt->isWrite()) {
-            if (blk->checkWrite(pkt)) {
-                blk->status |= BlkDirty;
-                pkt->writeDataToBlock(blk->data, blkSize);
-            }
-        } else if (pkt->isReadWrite()) {
-            cmpAndSwap(blk, pkt);
-        } else {
-            if (pkt->isLocked()) {
-                blk->trackLoadLocked(pkt);
-            }
-            pkt->setDataFromBlock(blk->data, blkSize);
-        }
-
-        return true;
-    } else {
-        return false;
-    }
-}
-
-
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
-{
-    assert(target != NULL);
-    assert(target->isCpuSide());
-    return satisfyCpuSideRequest(target->pkt, blk);
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                                       BlkType *blk)
-{
-    // respond to MSHR targets, if any
-
-    // First offset for critical word first calculations
-    int initial_offset = 0;
-
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
-    }
-
-    while (mshr->hasTargets()) {
-        MSHR::Target *target = mshr->getTarget();
-
-        if (!satisfyTarget(target, blk)) {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-            break;
-        }
-
-
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
-        }
-
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-        if (!target->pkt->req->isUncacheable()) {
-            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
-        }
-        target->pkt->makeTimingResponse();
-        cpuSidePort->respond(target->pkt, completion_time);
-        mshr->popTarget();
-    }
-}
-
-
 /////////////////////////////////////////////////////
 //
 // Snoop path: requests coming in from the memory side
@@ -1052,7 +1025,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
             // (hwpf_mshr_misses)
             mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             // Don't request bus, since we already have it
-            return allocateBuffer(pkt, curTick, true, false);
+            return allocateMissBuffer(pkt, curTick, false);
         }
     }
 
@@ -1062,7 +1035,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
 
 template<class TagStore, class Coherence>
 PacketPtr
-Cache<TagStore,Coherence>::getPacket()
+Cache<TagStore,Coherence>::getTimingPacket()
 {
     MSHR *mshr = getNextMSHR();
 
@@ -1073,30 +1046,21 @@ Cache<TagStore,Coherence>::getPacket()
     BlkType *blk = tags->findBlock(mshr->addr);
 
     // use request from 1st target
-    MSHR::Target *tgt1 = mshr->getTarget();
-    PacketPtr tgt1_pkt = tgt1->pkt;
-    PacketPtr pkt;
+    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
+    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
 
-    if (mshr->isCacheFill) {
-        MemCmd cmd;
-        if (blk && blk->isValid()) {
-            // only reason to be here is that blk is shared
-            // (read-only) and we need exclusive
-            assert(mshr->needsExclusive && !blk->isWritable());
-            cmd = MemCmd::UpgradeReq;
-        } else {
-            // block is invalid
-            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    mshr->isCacheFill = (pkt != NULL);
+
+    if (pkt == NULL) {
+        // make copy of current packet to forward
+        pkt = new Packet(tgt_pkt);
+        pkt->allocate();
+        if (pkt->isWrite()) {
+            pkt->setData(tgt_pkt->getPtr<uint8_t>());
         }
-        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
-    } else {
-        assert(blk == NULL);
-        assert(mshr->getNumTargets() == 1);
-        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
     }
 
     pkt->senderState = mshr;
-    pkt->allocate();
     return pkt;
 }
 
@@ -1243,7 +1207,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
         waitingOnRetry = !success;
     } else {
         // check for non-response packets (requests & writebacks)
-        PacketPtr pkt = myCache()->getPacket();
+        PacketPtr pkt = myCache()->getTimingPacket();
         MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
         bool success = sendTiming(pkt);
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 3fd17c8c7..47d2b469f 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -259,7 +259,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     MC::Command writeToSharedCmd =
         doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
     MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExResp;
+        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;
 
     // Note that all transitions by default cause a panic.
     // Override the valid transitions with the appropriate actions here.
@@ -272,6 +272,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
     tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 218d42339..1f2c05a6e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -54,12 +54,12 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target)
 {
     addr = _addr;
     size = _size;
     assert(target);
-    isCacheFill = cacheFill;
+    isCacheFill = false;
     needsExclusive = target->needsExclusive();
     _isUncacheable = target->req->isUncacheable();
     inService = false;
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index b38b69c52..47f6a819b 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -136,7 +136,7 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
+    void allocate(Addr addr, int size, PacketPtr pkt);
 
     /**
      * Allocate this MSHR as a buffer for the given request.
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index d58594798..6b030a865 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -36,8 +36,9 @@
 
 using namespace std;
 
-MSHRQueue::MSHRQueue(int num_entries, int reserve)
-    : numEntries(num_entries + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve, int _index)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve),
+      index(_index)
 {
     allocated = 0;
     inServiceEntries = 0;
@@ -107,14 +108,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 }
 
 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
 {
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    mshr->allocate(addr, size, pkt, isFill);
+    mshr->allocate(addr, size, pkt);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
 
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 182dfd5b2..806aa9c64 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -74,6 +74,9 @@ class MSHRQueue
     int allocated;
     /** The number of entries that have been forwarded to the bus. */
     int inServiceEntries;
+    /** The index of this queue within the cache (MSHR queue vs. write
+     * buffer). */
+    const int index;
 
     /**
      * Create a queue with a given number of entries.
@@ -81,7 +84,7 @@ class MSHRQueue
      * @param reserve The minimum number of entries needed to satisfy
      * any access.
      */
-    MSHRQueue(int num_entries, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve, int index);
 
     /** Destructor */
     ~MSHRQueue();
@@ -118,7 +121,7 @@ class MSHRQueue
      *
      * @pre There are free entries.
      */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index d03cfe3ae..378363665 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
             keepTrying = cache->inCache(pkt->getAddr());
         }
         if (pf.empty()) {
-            cache->deassertMemSideBusRequest(Request_PF);
+            cache->deassertMemSideBusRequest(BaseCache::Request_PF);
             if (keepTrying) return NULL; //None left, all were in cache
         }
     } while (keepTrying);
@@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pfRemovedMSHR++;
             pf.erase(iter);
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
         //Remove anything in queue with delay older than time
@@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                 iter--;
             }
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
 
@@ -243,7 +243,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pf.push_back(prefetch);
 
             //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(Request_PF, prefetch->time);
+            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
 
             //Increment through the list
             addr++;
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 57c6a6381..cd0ed8a2e 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -64,10 +64,8 @@ MemCmd::commandInfo[] =
     /* WriteResp */
     { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
-            WritebackAck, "Writeback" },
-    /* WritebackAck */
-    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET4(IsWrite, NeedsExclusive, IsRequest, HasData),
+            InvalidCmd, "Writeback" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
@@ -88,7 +86,11 @@ MemCmd::commandInfo[] =
     { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
             InvalidCmd, "WriteInvalidateResp" },
     /* UpgradeReq */
-    { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
+    { SET4(IsInvalidate, NeedsExclusive, IsRequest, NeedsResponse),
+            UpgradeResp, "UpgradeReq" },
+    /* UpgradeResp */
+    { SET3(IsInvalidate, NeedsExclusive, IsResponse),
+            InvalidCmd, "UpgradeResp" },
     /* ReadExReq */
     { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
             ReadExResp, "ReadExReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index ca186d875..6291b7c1d 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -67,7 +67,6 @@ class MemCmd
         WriteReq,
         WriteResp,
         Writeback,
-        WritebackAck,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
@@ -75,6 +74,7 @@ class MemCmd
         WriteInvalidateReq,
         WriteInvalidateResp,
         UpgradeReq,
+        UpgradeResp,
         ReadExReq,
         ReadExResp,
         LoadLockedReq,
@@ -100,7 +100,6 @@ class MemCmd
         NeedsResponse,  //!< Requester needs response from target
         IsSWPrefetch,
         IsHWPrefetch,
-        IsUpgrade,
         IsLocked,       //!< Alpha/MIPS LL or SC access
         HasData,        //!< There is an associated payload
         NUM_COMMAND_ATTRIBUTES
-- 
cgit v1.2.3


From bdd5fd20fb19eb52ef812cd284094e5513646e36 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 22 Jun 2007 09:24:07 -0700
Subject: Fixes to hitLatency, blocking, buffer allocation. Single-cpu timing
 mode seems to work now.

--HG--
extra : convert_revision : 720f6172df18a1c941e5bd0e8fdfbd686c13c7ad
---
 src/mem/cache/base_cache.cc      |  1 +
 src/mem/cache/base_cache.hh      | 31 ++++++++++------------
 src/mem/cache/cache.hh           | 26 -------------------
 src/mem/cache/cache_impl.hh      | 56 +++++++++++++++++-----------------------
 src/mem/cache/miss/mshr.hh       | 24 +++++++----------
 src/mem/cache/miss/mshr_queue.cc | 10 +++----
 6 files changed, 53 insertions(+), 95 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 8b476e100..1f5182574 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -54,6 +54,7 @@ BaseCache::BaseCache(const std::string &name, Params &params)
       writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
                   MSHRQueue_WriteBuffer),
       blkSize(params.blkSize),
+      hitLatency(params.hitLatency),
       numTarget(params.numTargets),
       blocked(0),
       noTargetMSHR(NULL),
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 10fd3289c..27134b2ad 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -195,6 +195,11 @@ class BaseCache : public MemObject
     /** Block size of this cache */
     const int blkSize;
 
+    /**
+     * The latency of a hit in this device.
+     */
+    int hitLatency;
+
     /** The number of targets for each MSHR. */
     const int numTarget;
 
@@ -464,15 +469,10 @@ class BaseCache : public MemObject
         if (blocked == 0) {
             blocked_causes[cause]++;
             blockedCycle = curTick;
+            cpuSidePort->setBlocked();
         }
-        int old_state = blocked;
-        if (!(blocked & flag)) {
-            //Wasn't already blocked for this cause
-            blocked |= flag;
-            DPRINTF(Cache,"Blocking for cause %s\n", cause);
-            if (!old_state)
-                cpuSidePort->setBlocked();
-        }
+        blocked |= flag;
+        DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
     }
 
     /**
@@ -485,16 +485,11 @@ class BaseCache : public MemObject
     void clearBlocked(BlockedCause cause)
     {
         uint8_t flag = 1 << cause;
-        DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n",
-                cause, blocked);
-        if (blocked & flag)
-        {
-            blocked &= ~flag;
-            if (!isBlocked()) {
-                blocked_cycles[cause] += curTick - blockedCycle;
-                DPRINTF(Cache,"Unblocking from all causes\n");
-                cpuSidePort->clearBlocked();
-            }
+        blocked &= ~flag;
+        DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
+        if (blocked == 0) {
+            blocked_cycles[cause] += curTick - blockedCycle;
+            cpuSidePort->clearBlocked();
         }
     }
 
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 06fce1a71..a93b761ec 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -136,23 +136,6 @@ class Cache : public BaseCache
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
-    /**
-     * The clock ratio of the outgoing bus.
-     * Used for calculating critical word first.
-     */
-    int busRatio;
-
-     /**
-      * The bus width in bytes of the outgoing bus.
-      * Used for calculating critical word first.
-      */
-    int busWidth;
-
-    /**
-     * The latency of a hit in this device.
-     */
-    int hitLatency;
-
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
@@ -303,15 +286,6 @@ class Cache : public BaseCache
      */
     void squash(int threadNum);
 
-    /**
-     * Allocate a new MSHR or write buffer to handle a miss.
-     * @param pkt The access that missed.
-     * @param time The time to continue processing the miss.
-     * @param isFill Whether to fetch & allocate a block
-     *               or just forward the request.
-     */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);
-
     /**
      * Selects a outstanding request to service.
      * @return The request to service, NULL if none found.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 81fcb4158..0649b5061 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -149,27 +149,6 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
-MSHR *
-Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool requestBus)
-{
-    MSHRQueue *mq = NULL;
-
-    if (pkt->isWrite() && !pkt->isRead()) {
-        /**
-         * @todo Add write merging here.
-         */
-        mq = &writeBuffer;
-    } else {
-        mq = &mshrQueue;
-    }
-
-    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
-                                  pkt, time, requestBus);
-}
-
-
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
@@ -438,6 +417,8 @@ Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
         return NULL;
     }
 
+    assert(cpu_pkt->needsResponse());
+
     MemCmd cmd;
     const bool useUpgrades = true;
     if (blkValid && useUpgrades) {
@@ -1043,23 +1024,34 @@ Cache<TagStore,Coherence>::getTimingPacket()
         return NULL;
     }
 
-    BlkType *blk = tags->findBlock(mshr->addr);
-
     // use request from 1st target
     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
-    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+    PacketPtr pkt = NULL;
 
-    mshr->isCacheFill = (pkt != NULL);
-
-    if (pkt == NULL) {
-        // make copy of current packet to forward
-        pkt = new Packet(tgt_pkt);
-        pkt->allocate();
-        if (pkt->isWrite()) {
-            pkt->setData(tgt_pkt->getPtr<uint8_t>());
+    if (mshr->isSimpleForward()) {
+        // no response expected, just forward packet as it is
+        assert(tags->findBlock(mshr->addr) == NULL);
+        pkt = tgt_pkt;
+    } else {
+        BlkType *blk = tags->findBlock(mshr->addr);
+        pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+
+        mshr->isCacheFill = (pkt != NULL);
+
+        if (pkt == NULL) {
+            // not a cache block request, but a response is expected
+            assert(!mshr->isSimpleForward());
+            // make copy of current packet to forward, keep current
+            // copy for response handling
+            pkt = new Packet(tgt_pkt);
+            pkt->allocate();
+            if (pkt->isWrite()) {
+                pkt->setData(tgt_pkt->getPtr<uint8_t>());
+            }
         }
     }
 
+    assert(pkt != NULL);
     pkt->senderState = mshr;
     return pkt;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 47f6a819b..195438e46 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -164,28 +164,19 @@ public:
      * Returns the current number of allocated targets.
      * @return The current number of allocated targets.
      */
-    int getNumTargets()
-    {
-        return ntargets;
-    }
+    int getNumTargets() { return ntargets; }
 
     /**
      * Returns a pointer to the target list.
      * @return a pointer to the target list.
      */
-    TargetList* getTargetList()
-    {
-        return &targets;
-    }
+    TargetList* getTargetList() { return &targets; }
 
     /**
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Target *getTarget()
-    {
-        return &targets.front();
-    }
+    Target *getTarget() { return &targets.front(); }
 
     /**
      * Pop first target.
@@ -200,9 +191,14 @@ public:
      * Returns true if there are targets left.
      * @return true if there are targets
      */
-    bool hasTargets()
+    bool hasTargets() { return !targets.empty(); }
+
+    bool isSimpleForward()
     {
-        return !targets.empty();
+        if (getNumTargets() != 1)
+            return false;
+        Target *tgt = getTarget();
+        return tgt->isCpuSide() && !tgt->pkt->needsResponse();
     }
 
     /**
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 6b030a865..3407e2588 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -158,14 +158,14 @@ MSHRQueue::moveToFront(MSHR *mshr)
 void
 MSHRQueue::markInService(MSHR *mshr)
 {
-    //assert(mshr == pendingList.front());
-#if 0
-    if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) {
-        assert(mshr->getNumTargets() == 0);
+    if (mshr->isSimpleForward()) {
+        // we just forwarded the request packet & don't expect a
+        // response, so get rid of it
+        assert(mshr->getNumTargets() == 1);
+        mshr->popTarget();
         deallocate(mshr);
         return;
     }
-#endif
     mshr->inService = true;
     pendingList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
-- 
cgit v1.2.3


From 57ff2604e59647c6afe988767186f13c80c1aa16 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 23 Jun 2007 13:24:33 -0700
Subject: Minor fix plus new assertion to catch similar bugs.

src/cpu/memtest/memtest.cc:
    Need to set packet source field so that response from cache
    doesn't run into assertion failure when copying source to dest.
src/mem/packet.hh:
    Copy source field when copying packets.
    Assert that source is valid before copying it to dest
    when turning packets around.

--HG--
extra : convert_revision : 09e3cfda424aa89fe170e21e955b295746832bf8
---
 src/cpu/memtest/memtest.cc | 2 ++
 src/mem/packet.hh          | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 6e8c5d0bf..019b4328c 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -344,6 +344,7 @@ MemTest::tick()
                 req->getPaddr(), blockAddr(req->getPaddr()), *result);
 
         PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        pkt->setSrc(0);
         pkt->dataDynamicArray(new uint8_t[req->getSize()]);
         MemTestSenderState *state = new MemTestSenderState(result);
         pkt->senderState = state;
@@ -373,6 +374,7 @@ MemTest::tick()
                 req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff);
 
         PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+        pkt->setSrc(0);
         uint8_t *pkt_data = new uint8_t[req->getSize()];
         pkt->dataDynamicArray(pkt_data);
         memcpy(pkt_data, &data, req->getSize());
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 80da045ef..fc1c283ed 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -384,7 +384,7 @@ class Packet : public FastAlloc
     Packet(Packet *origPkt)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
-           dest(origPkt->dest),
+           src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
            snoopFlags(origPkt->snoopFlags),
            time(curTick),
@@ -440,7 +440,7 @@ class Packet : public FastAlloc
      */
     void convertAtomicToTimingResponse()
     {
-        dest = src;
+        dest = getSrc();
         srcValid = false;
     }
 
-- 
cgit v1.2.3


From 47bce8ef7875420b2e26ebd834ed0d4146b65d5b Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 24 Jun 2007 17:32:31 -0700
Subject: Better handling of deferred targets.

--HG--
extra : convert_revision : 0fbc28c32c1eeb3dd672df14c1d53bd516f81d0f
---
 src/mem/cache/base_cache.cc |   3 +-
 src/mem/cache/base_cache.hh |   2 -
 src/mem/cache/cache.hh      |   3 +-
 src/mem/cache/cache_impl.hh | 127 ++++++++++++++++++++++----------------------
 src/mem/cache/miss/mshr.cc  |  90 ++++++++++++++++++++++---------
 src/mem/cache/miss/mshr.hh  |  17 +++---
 6 files changed, 141 insertions(+), 101 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 1f5182574..ac577f5a2 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -42,8 +42,7 @@ using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
     : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
-      blocked(false), waitingOnRetry(false), mustSendRetry(false),
-      requestCauses(0)
+      blocked(false), mustSendRetry(false), requestCauses(0)
 {
 }
 
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 27134b2ad..b35fc0811 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -118,8 +118,6 @@ class BaseCache : public MemObject
 
         bool blocked;
 
-        bool waitingOnRetry;
-
         bool mustSendRetry;
 
         /**
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index a93b761ec..2a95dc53c 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -182,8 +182,7 @@ class Cache : public BaseCache
     BlkType *handleFill(PacketPtr pkt, BlkType *blk,
                         PacketList &writebacks);
 
-    bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
-    bool satisfyTarget(MSHR::Target *target, BlkType *blk);
+    void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0649b5061..b4d334249 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -368,7 +368,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
                 mshr->threadNum = -1;
             }
-            mshr->allocateTarget(pkt, true);
+            mshr->allocateTarget(pkt);
             if (mshr->getNumTargets() == numTarget) {
                 noTargetMSHR = mshr;
                 setBlocked(Blocked_NoTargets);
@@ -483,8 +483,7 @@ Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
         if (isCacheFill) {
             PacketList writebacks;
             blk = handleFill(busPkt, blk, writebacks);
-            bool status = satisfyCpuSideRequest(pkt, blk);
-            assert(status);
+            satisfyCpuSideRequest(pkt, blk);
             delete busPkt;
 
             // Handle writebacks if needed
@@ -538,12 +537,14 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 
     // There can be many matching outstanding writes.
     std::vector<MSHR*> writes;
-    writeBuffer.findMatches(blk_addr, writes);
+    assert(!writeBuffer.findMatches(blk_addr, writes));
+/*  Need to change this to iterate through targets in mshr??
     for (int i = 0; i < writes.size(); ++i) {
         MSHR *mshr = writes[i];
         if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
             return;
     }
+*/
 
     otherSidePort->checkAndSendFunctional(pkt);
 }
@@ -557,43 +558,30 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 
 
 template<class TagStore, class Coherence>
-bool
+void
 Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
-        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-        if (pkt->isWrite()) {
-            if (blk->checkWrite(pkt)) {
-                blk->status |= BlkDirty;
-                pkt->writeDataToBlock(blk->data, blkSize);
-            }
-        } else if (pkt->isReadWrite()) {
-            cmpAndSwap(blk, pkt);
-        } else {
-            if (pkt->isLocked()) {
-                blk->trackLoadLocked(pkt);
-            }
-            pkt->setDataFromBlock(blk->data, blkSize);
+    assert(blk);
+    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+    if (pkt->isWrite()) {
+        if (blk->checkWrite(pkt)) {
+            blk->status |= BlkDirty;
+            pkt->writeDataToBlock(blk->data, blkSize);
         }
-
-        return true;
+    } else if (pkt->isReadWrite()) {
+        cmpAndSwap(blk, pkt);
     } else {
-        return false;
+        if (pkt->isLocked()) {
+            blk->trackLoadLocked(pkt);
+        }
+        pkt->setDataFromBlock(blk->data, blkSize);
     }
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
-{
-    assert(target != NULL);
-    assert(target->isCpuSide());
-    return satisfyCpuSideRequest(target->pkt, blk);
-}
-
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
@@ -611,37 +599,42 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
     while (mshr->hasTargets()) {
         MSHR::Target *target = mshr->getTarget();
 
-        if (!satisfyTarget(target, blk)) {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-            MSHRQueue *mq = mshr->queue;
-            mq->markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-            return false;
-        }
+        if (target->isCpuSide()) {
+            satisfyCpuSideRequest(target->pkt, blk);
+            // How many bytes pass the first request is this one
+            int transfer_offset =
+                target->pkt->getOffset(blkSize) - initial_offset;
+            if (transfer_offset < 0) {
+                transfer_offset += blkSize;
+            }
 
+            // If critical word (no offset) return first word time
+            Tick completion_time = tags->getHitLatency() +
+                transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
+            if (!target->pkt->req->isUncacheable()) {
+                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                    completion_time - target->time;
+            }
+            target->pkt->makeTimingResponse();
+            cpuSidePort->respond(target->pkt, completion_time);
+        } else {
+            // response to snoop request
+            DPRINTF(Cache, "processing deferred snoop...\n");
+            handleSnoop(target->pkt, blk, true);
         }
 
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-        if (!target->pkt->req->isUncacheable()) {
-            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
-        }
-        target->pkt->makeTimingResponse();
-        cpuSidePort->respond(target->pkt, completion_time);
         mshr->popTarget();
     }
 
+    if (mshr->promoteDeferredTargets()) {
+        MSHRQueue *mq = mshr->queue;
+        mq->markPending(mshr);
+        mshr->order = order++;
+        requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+        return false;
+    }
+
     return true;
 }
 
@@ -653,6 +646,7 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     Tick time = curTick + hitLatency;
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     assert(mshr);
+
     if (pkt->result == Packet::Nacked) {
         //pkt->reinitFromRequest();
         warn("NACKs from devices not connected to the same bus "
@@ -661,7 +655,7 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     }
     assert(pkt->result != Packet::BadAddress);
     assert(pkt->result == Packet::Success);
-    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
+    DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr());
 
     MSHRQueue *mq = mshr->queue;
     bool wasFull = mq->isFull();
@@ -883,7 +877,12 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
     // better not be snooping a request that conflicts with something
     // we have outstanding...
-    assert(!mshr || !mshr->inService);
+    if (mshr && mshr->inService) {
+        assert(mshr->getNumTargets() < numTarget); //handle later
+        mshr->allocateSnoopTarget(pkt);
+        assert(mshr->getNumTargets() < numTarget); //handle later
+        return;
+    }
 
     //We also need to check the writeback buffers and handle those
     std::vector<MSHR *> writebacks;
@@ -895,6 +894,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
         for (int i=0; i<writebacks.size(); i++) {
             mshr = writebacks[i];
             assert(!mshr->isUncacheable());
+            assert(mshr->getNumTargets() == 1);
+            PacketPtr wb_pkt = mshr->getTarget()->pkt;
+            assert(wb_pkt->cmd == MemCmd::Writeback);
 
             if (pkt->isRead()) {
                 pkt->assertMemInhibit();
@@ -906,7 +908,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
                     // the packet's invalidate flag is set...
                     assert(pkt->isInvalidate());
                 }
-                doTimingSupplyResponse(pkt, mshr->writeData);
+                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>());
             }
 
             if (pkt->isInvalidate()) {
@@ -1208,7 +1210,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
 
         waitingOnRetry = !success;
         if (waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+            DPRINTF(CachePort, "now waiting on a retry\n");
         } else {
             myCache()->markInService(mshr);
         }
@@ -1220,8 +1222,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     if (!waitingOnRetry) {
         if (isBusRequested()) {
             // more requests/writebacks: rerequest ASAP
-            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
-                    name());
+            DPRINTF(CachePort, "still more MSHR requests to send\n");
             sendEvent->schedule(curTick+1);
         } else if (!transmitList.empty()) {
             // deferred packets: rerequest bus, but possibly not until later
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 1f2c05a6e..24ff3b33c 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -68,12 +68,16 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target)
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem0-side request), so set cpuSide to true here.
     targets.push_back(Target(target, true));
+    assert(deferredTargets.empty());
+    deferredNeedsExclusive = false;
+    pendingInvalidate = false;
 }
 
 void
 MSHR::deallocate()
 {
     assert(targets.empty());
+    assert(deferredTargets.empty());
     assert(ntargets == 0);
     inService = false;
     //allocIter = NULL;
@@ -84,41 +88,77 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target, bool cpuSide)
+MSHR::allocateTarget(PacketPtr target)
 {
-    //If we append an invalidate and we issued a read to the bus,
-    //but now have some pending writes, we need to move
-    //the invalidate to before the first non-read
-    if (inService && !inServiceForExclusive && needsExclusive
-        && !cpuSide && target->isInvalidate()) {
-        std::list<Target> temp;
-
-        while (!targets.empty()) {
-            if (targets.front().pkt->needsExclusive()) break;
-            //Place on top of temp stack
-            temp.push_front(targets.front());
-            //Remove from targets
-            targets.pop_front();
+    if (inService) {
+        if (!deferredTargets.empty() || pendingInvalidate ||
+            (!needsExclusive && target->needsExclusive())) {
+            // need to put on deferred list
+            deferredTargets.push_back(Target(target, true));
+            if (target->needsExclusive()) {
+                deferredNeedsExclusive = true;
+            }
+        } else {
+            // still OK to append to outstanding request
+            targets.push_back(Target(target, true));
+        }
+    } else {
+        if (target->needsExclusive()) {
+            needsExclusive = true;
         }
 
-        //Now that we have all the reads off until first non-read, we can
-        //place the invalidate on
-        targets.push_front(Target(target, cpuSide));
+        targets.push_back(Target(target, true));
+    }
 
-        //Now we pop off the temp_stack and put them back
-        while (!temp.empty()) {
-            targets.push_front(temp.front());
-            temp.pop_front();
-        }
+    ++ntargets;
+}
+
+void
+MSHR::allocateSnoopTarget(PacketPtr target)
+{
+    assert(inService); // don't bother to call otherwise
+
+    if (pendingInvalidate) {
+        // a prior snoop has already appended an invalidation, so
+        // logically we don't have the block anymore...
+        return;
     }
-    else {
-        targets.push_back(Target(target, cpuSide));
+
+    if (needsExclusive) {
+        // We're awaiting an exclusive copy, so ownership is pending.
+        // It's up to us to respond once the data arrives.
+        target->assertMemInhibit();
+    } else if (target->needsExclusive()) {
+        // This transaction will take away our pending copy
+        pendingInvalidate = true;
+    } else {
+        // If we're not going to supply data or perform an
+        // invalidation, we don't need to save this.
+        return;
     }
 
+    targets.push_back(Target(target, false));
     ++ntargets;
+}
+
+
+bool
+MSHR::promoteDeferredTargets()
+{
+    if (deferredTargets.empty()) {
+        return false;
+    }
+
+    assert(targets.empty());
+    targets = deferredTargets;
+    deferredTargets.clear();
     assert(targets.size() == ntargets);
 
-    needsExclusive = needsExclusive || target->needsExclusive();
+    needsExclusive = deferredNeedsExclusive;
+    pendingInvalidate = false;
+    deferredNeedsExclusive = false;
+
+    return true;
 }
 
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 195438e46..f4e090a12 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -85,9 +85,6 @@ class MSHR : public Packet::SenderState
     /** Size of the request. */
     int size;
 
-    /** Data associated with the request (if a write). */
-    uint8_t *writeData;
-
     /** True if the request has been sent to the bus. */
     bool inService;
 
@@ -95,12 +92,13 @@ class MSHR : public Packet::SenderState
     bool isCacheFill;
     /** True if we need to get an exclusive copy of the block. */
     bool needsExclusive;
+
     /** True if the request is uncacheable */
     bool _isUncacheable;
 
-    /** True if the request that has been sent to the bus is for en
-     * exclusive copy. */
-    bool inServiceForExclusive;
+    bool deferredNeedsExclusive;
+    bool pendingInvalidate;
+
     /** Thread number of the miss. */
     short threadNum;
     /** The number of currently allocated targets. */
@@ -124,6 +122,8 @@ private:
     /** List of all requests that match the address */
     TargetList targets;
 
+    TargetList deferredTargets;
+
 public:
 
     bool isUncacheable() { return _isUncacheable; }
@@ -153,7 +153,8 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr target, bool cpuSide);
+    void allocateTarget(PacketPtr target);
+    void allocateSnoopTarget(PacketPtr target);
 
     /** A simple constructor. */
     MSHR();
@@ -201,6 +202,8 @@ public:
         return tgt->isCpuSide() && !tgt->pkt->needsResponse();
     }
 
+    bool promoteDeferredTargets();
+
     /**
      * Prints the contents of this MSHR to stderr.
      */
-- 
cgit v1.2.3


From 529f12a531c331e4bdcf595a3aaf65ee5ef6b72d Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 25 Jun 2007 06:47:05 -0700
Subject: Get rid of requestCauses.  Use timestamped queue to make sure we
 don't re-request bus prematurely.  Use callback to avoid calling sendRetry()
 recursively within recvTiming.

--HG--
extra : convert_revision : a907a2781b4b00aa8eb1ea7147afc81d6b424140
---
 src/mem/cache/base_cache.cc      |  6 ++++--
 src/mem/cache/base_cache.hh      | 42 ++++++++++++-------------------------
 src/mem/cache/cache_impl.hh      | 28 ++++++++++++-------------
 src/mem/cache/miss/mshr.cc       | 24 +++++++++++++--------
 src/mem/cache/miss/mshr.hh       | 26 +++++++++++------------
 src/mem/cache/miss/mshr_queue.cc | 45 +++++++++++++++++++++++++++++-----------
 src/mem/cache/miss/mshr_queue.hh | 23 +++++++++++++-------
 src/mem/tport.cc                 | 22 +++++++++++---------
 src/mem/tport.hh                 | 18 ++++++++++++++++
 9 files changed, 137 insertions(+), 97 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index ac577f5a2..5062d6e87 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -42,7 +42,7 @@ using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
     : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
-      blocked(false), mustSendRetry(false), requestCauses(0)
+      blocked(false), mustSendRetry(false)
 {
 }
 
@@ -116,7 +116,9 @@ BaseCache::CachePort::clearBlocked()
     {
         DPRINTF(Cache, "Cache Sending Retry\n");
         mustSendRetry = false;
-        sendRetry();
+        SendRetryEvent *ev = new SendRetryEvent(this, true);
+        // @TODO: need to find a better time (next bus cycle?)
+        ev->schedule(curTick + 1);
     }
 }
 
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index b35fc0811..09484a14a 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -41,6 +41,7 @@
 #include <vector>
 #include <string>
 #include <list>
+#include <algorithm>
 #include <inttypes.h>
 
 #include "base/misc.hh"
@@ -105,6 +106,9 @@ class BaseCache : public MemObject
 
         bool recvRetryCommon();
 
+        typedef EventWrapper<Port, &Port::sendRetry>
+            SendRetryEvent;
+
       public:
         void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; }
 
@@ -120,27 +124,12 @@ class BaseCache : public MemObject
 
         bool mustSendRetry;
 
-        /**
-         * Bit vector for the outstanding requests for the master interface.
-         */
-        uint8_t requestCauses;
-
-        bool isBusRequested() { return requestCauses != 0; }
-
         void requestBus(RequestCause cause, Tick time)
         {
             DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
-            if (!isBusRequested() && !waitingOnRetry) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
+            if (!waitingOnRetry) {
+                schedSendEvent(time);
             }
-            requestCauses |= (1 << cause);
-        }
-
-        void deassertBusRequest(RequestCause cause)
-        {
-            DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause);
-            requestCauses &= ~(1 << cause);
         }
 
         void respond(PacketPtr pkt, Tick time) {
@@ -163,8 +152,7 @@ class BaseCache : public MemObject
     MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
                                  PacketPtr pkt, Tick time, bool requestBus)
     {
-        MSHR *mshr = mq->allocate(addr, size, pkt);
-        mshr->order = order++;
+        MSHR *mshr = mq->allocate(addr, size, pkt, time, order++);
 
         if (mq->isFull()) {
             setBlocked((BlockedCause)mq->index);
@@ -182,9 +170,6 @@ class BaseCache : public MemObject
         MSHRQueue *mq = mshr->queue;
         bool wasFull = mq->isFull();
         mq->markInService(mshr);
-        if (!mq->havePending()) {
-            deassertMemSideBusRequest((RequestCause)mq->index);
-        }
         if (wasFull && !mq->isFull()) {
             clearBlocked((BlockedCause)mq->index);
         }
@@ -491,13 +476,10 @@ class BaseCache : public MemObject
         }
     }
 
-    /**
-     * True if the memory-side bus should be requested.
-     * @return True if there are outstanding requests for the master bus.
-     */
-    bool isMemSideBusRequested()
+    Tick nextMSHRReadyTick()
     {
-        return memSidePort->isBusRequested();
+        return std::min(mshrQueue.nextMSHRReadyTick(),
+                        writeBuffer.nextMSHRReadyTick());
     }
 
     /**
@@ -516,7 +498,9 @@ class BaseCache : public MemObject
      */
     void deassertMemSideBusRequest(RequestCause cause)
     {
-        memSidePort->deassertBusRequest(cause);
+        // obsolete!!
+        assert(false);
+        // memSidePort->deassertBusRequest(cause);
         // checkDrain();
     }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b4d334249..7610b5a41 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -185,9 +185,6 @@ Cache<TagStore,Coherence>::squash(int threadNum)
         cause = Blocked_NoMSHRs;
     }
     mshrQueue.squash(threadNum);
-    if (!mshrQueue.havePending()) {
-        deassertMemSideBusRequest(Request_MSHR);
-    }
     if (unblock && !mshrQueue.isFull()) {
         clearBlocked(cause);
     }
@@ -368,11 +365,14 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
                 mshr->threadNum = -1;
             }
-            mshr->allocateTarget(pkt);
+            mshr->allocateTarget(pkt, time, order++);
             if (mshr->getNumTargets() == numTarget) {
                 noTargetMSHR = mshr;
                 setBlocked(Blocked_NoTargets);
-                mshrQueue.moveToFront(mshr);
+                // need to be careful with this... if this mshr isn't
+                // ready yet (i.e. time > curTick_, we don't want to
+                // move it ahead of mshrs that are ready
+                // mshrQueue.moveToFront(mshr);
             }
         } else {
             // no MSHR
@@ -630,7 +630,6 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
     if (mshr->promoteDeferredTargets()) {
         MSHRQueue *mq = mshr->queue;
         mq->markPending(mshr);
-        mshr->order = order++;
         requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
         return false;
     }
@@ -879,7 +878,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // we have outstanding...
     if (mshr && mshr->inService) {
         assert(mshr->getNumTargets() < numTarget); //handle later
-        mshr->allocateSnoopTarget(pkt);
+        mshr->allocateSnoopTarget(pkt, curTick, order++);
         assert(mshr->getNumTargets() < numTarget); //handle later
         return;
     }
@@ -1202,6 +1201,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     } else {
         // check for non-response packets (requests & writebacks)
         PacketPtr pkt = myCache()->getTimingPacket();
+        assert(pkt != NULL);
         MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
         bool success = sendTiming(pkt);
@@ -1220,14 +1220,12 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     // tried to send packet... if it was successful (no retry), see if
     // we need to rerequest bus or not
     if (!waitingOnRetry) {
-        if (isBusRequested()) {
-            // more requests/writebacks: rerequest ASAP
-            DPRINTF(CachePort, "still more MSHR requests to send\n");
-            sendEvent->schedule(curTick+1);
-        } else if (!transmitList.empty()) {
-            // deferred packets: rerequest bus, but possibly not until later
-            Tick time = transmitList.front().tick;
-            sendEvent->schedule(time <= curTick ? curTick+1 : time);
+        Tick nextReady = std::min(deferredPacketReadyTick(),
+                                  myCache()->nextMSHRReadyTick());
+        // @TODO: need to facotr in prefetch requests here somehow
+        if (nextReady != MaxTick) {
+            DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
+            sendEvent->schedule(std::max(nextReady, curTick + 1));
         } else {
             // no more to send right now: if we're draining, we may be done
             if (drainEvent) {
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 24ff3b33c..8fa11ab2e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -37,6 +37,7 @@
 #include <assert.h>
 #include <string>
 #include <vector>
+#include <algorithm>
 
 #include "mem/cache/miss/mshr.hh"
 #include "sim/core.hh" // for curTick
@@ -54,10 +55,13 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target,
+               Tick when, Counter _order)
 {
     addr = _addr;
     size = _size;
+    readyTick = when;
+    order = _order;
     assert(target);
     isCacheFill = false;
     needsExclusive = target->needsExclusive();
@@ -66,8 +70,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target)
     threadNum = 0;
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
-    // snoop (mem0-side request), so set cpuSide to true here.
-    targets.push_back(Target(target, true));
+    // snoop (mem-side request), so set cpuSide to true here.
+    targets.push_back(Target(target, when, _order, true));
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
@@ -88,33 +92,33 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target)
+MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
 {
     if (inService) {
         if (!deferredTargets.empty() || pendingInvalidate ||
             (!needsExclusive && target->needsExclusive())) {
             // need to put on deferred list
-            deferredTargets.push_back(Target(target, true));
+            deferredTargets.push_back(Target(target, when, _order, true));
             if (target->needsExclusive()) {
                 deferredNeedsExclusive = true;
             }
         } else {
             // still OK to append to outstanding request
-            targets.push_back(Target(target, true));
+            targets.push_back(Target(target, when, _order, true));
         }
     } else {
         if (target->needsExclusive()) {
             needsExclusive = true;
         }
 
-        targets.push_back(Target(target, true));
+        targets.push_back(Target(target, when, _order, true));
     }
 
     ++ntargets;
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr target)
+MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -137,7 +141,7 @@ MSHR::allocateSnoopTarget(PacketPtr target)
         return;
     }
 
-    targets.push_back(Target(target, false));
+    targets.push_back(Target(target, when, _order, false));
     ++ntargets;
 }
 
@@ -157,6 +161,8 @@ MSHR::promoteDeferredTargets()
     needsExclusive = deferredNeedsExclusive;
     pendingInvalidate = false;
     deferredNeedsExclusive = false;
+    order = targets.front().order;
+    readyTick = std::max(curTick, targets.front().time);
 
     return true;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index f4e090a12..92288cf52 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -55,13 +55,14 @@ class MSHR : public Packet::SenderState
     class Target {
       public:
         Tick time;      //!< Time when request was received (for stats)
+        Counter order;  //!< Global order (for memory consistency mgmt)
         PacketPtr pkt;  //!< Pending request packet.
         bool cpuSide;   //!< Did request come from cpu side or mem side?
 
         bool isCpuSide() { return cpuSide; }
 
-        Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick)
-            : time(_time), pkt(_pkt), cpuSide(_cpuSide)
+        Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide)
+            : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide)
         {}
     };
 
@@ -79,6 +80,12 @@ class MSHR : public Packet::SenderState
     /** Pointer to queue containing this MSHR. */
     MSHRQueue *queue;
 
+    /** Cycle when ready to issue */
+    Tick readyTick;
+
+    /** Order number assigned by the miss queue. */
+    Counter order;
+
     /** Address of the request. */
     Addr addr;
 
@@ -103,8 +110,6 @@ class MSHR : public Packet::SenderState
     short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
-    /** Order number of assigned by the miss queue. */
-    uint64_t order;
 
     /**
      * Pointer to this MSHR on the ready list.
@@ -136,13 +141,8 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(Addr addr, int size, PacketPtr pkt);
-
-    /**
-     * Allocate this MSHR as a buffer for the given request.
-     * @param target The memory request to buffer.
-     */
-    void allocateAsBuffer(PacketPtr target);
+    void allocate(Addr addr, int size, PacketPtr pkt,
+                  Tick when, Counter _order);
 
     /**
      * Mark this MSHR as free.
@@ -153,8 +153,8 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr target);
-    void allocateSnoopTarget(PacketPtr target);
+    void allocateTarget(PacketPtr target, Tick when, Counter order);
+    void allocateSnoopTarget(PacketPtr target, Tick when, Counter order);
 
     /** A simple constructor. */
     MSHR();
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 3407e2588..18184bd20 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -90,8 +90,8 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
 MSHR *
 MSHRQueue::findPending(Addr addr, int size) const
 {
-    MSHR::ConstIterator i = pendingList.begin();
-    MSHR::ConstIterator end = pendingList.end();
+    MSHR::ConstIterator i = readyList.begin();
+    MSHR::ConstIterator end = readyList.end();
     for (; i != end; ++i) {
         MSHR *mshr = *i;
         if (mshr->addr < addr) {
@@ -107,17 +107,37 @@ MSHRQueue::findPending(Addr addr, int size) const
     return NULL;
 }
 
+
+MSHR::Iterator
+MSHRQueue::addToReadyList(MSHR *mshr)
+{
+    if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) {
+        return readyList.insert(readyList.end(), mshr);
+    }
+
+    MSHR::Iterator i = readyList.begin();
+    MSHR::Iterator end = readyList.end();
+    for (; i != end; ++i) {
+        if ((*i)->readyTick > mshr->readyTick) {
+            return readyList.insert(i, mshr);
+        }
+    }
+    assert(false);
+}
+
+
 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt,
+                    Tick when, Counter order)
 {
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    mshr->allocate(addr, size, pkt);
+    mshr->allocate(addr, size, pkt, when, order);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+    mshr->readyIter = addToReadyList(mshr);
 
     allocated += 1;
     return mshr;
@@ -139,7 +159,7 @@ MSHRQueue::deallocateOne(MSHR *mshr)
     if (mshr->inService) {
         inServiceEntries--;
     } else {
-        pendingList.erase(mshr->readyIter);
+        readyList.erase(mshr->readyIter);
     }
     mshr->deallocate();
     return retval;
@@ -150,14 +170,15 @@ MSHRQueue::moveToFront(MSHR *mshr)
 {
     if (!mshr->inService) {
         assert(mshr == *(mshr->readyIter));
-        pendingList.erase(mshr->readyIter);
-        mshr->readyIter = pendingList.insert(pendingList.begin(), mshr);
+        readyList.erase(mshr->readyIter);
+        mshr->readyIter = readyList.insert(readyList.begin(), mshr);
     }
 }
 
 void
 MSHRQueue::markInService(MSHR *mshr)
 {
+    assert(!mshr->inService);
     if (mshr->isSimpleForward()) {
         // we just forwarded the request packet & don't expect a
         // response, so get rid of it
@@ -167,23 +188,23 @@ MSHRQueue::markInService(MSHR *mshr)
         return;
     }
     mshr->inService = true;
-    pendingList.erase(mshr->readyIter);
+    readyList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
     inServiceEntries += 1;
-    //pendingList.pop_front();
+    //readyList.pop_front();
 }
 
 void
 MSHRQueue::markPending(MSHR *mshr)
 {
-    //assert(mshr->readyIter == NULL);
+    assert(mshr->inService);
     mshr->inService = false;
     --inServiceEntries;
     /**
      * @ todo might want to add rerequests to front of pending list for
      * performance.
      */
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+    mshr->readyIter = addToReadyList(mshr);
 }
 
 void
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 806aa9c64..fd61dec8b 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -51,7 +51,7 @@ class MSHRQueue
     /** Holds pointers to all allocated entries. */
     MSHR::List allocatedList;
     /** Holds pointers to entries that haven't been sent to the bus. */
-    MSHR::List pendingList;
+    MSHR::List readyList;
     /** Holds non allocated entries. */
     MSHR::List freeList;
 
@@ -69,6 +69,9 @@ class MSHRQueue
      */
     const int numReserve;
 
+    MSHR::Iterator addToReadyList(MSHR *mshr);
+
+
   public:
     /** The number of allocated entries. */
     int allocated;
@@ -121,7 +124,8 @@ class MSHRQueue
      *
      * @pre There are free entries.
      */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt,
+                   Tick when, Counter order);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
@@ -147,7 +151,7 @@ class MSHRQueue
 
     /**
      * Mark the given MSHR as in service. This removes the MSHR from the
-     * pendingList. Deallocates the MSHR if it does not expect a response.
+     * readyList. Deallocates the MSHR if it does not expect a response.
      * @param mshr The MSHR to mark in service.
      */
     void markInService(MSHR *mshr);
@@ -171,7 +175,7 @@ class MSHRQueue
      */
     bool havePending() const
     {
-        return !pendingList.empty();
+        return !readyList.empty();
     }
 
     /**
@@ -184,15 +188,20 @@ class MSHRQueue
     }
 
     /**
-     * Returns the MSHR at the head of the pendingList.
+     * Returns the MSHR at the head of the readyList.
      * @return The next request to service.
      */
     MSHR *getNextMSHR() const
     {
-        if (pendingList.empty()) {
+        if (readyList.empty() || readyList.front()->readyTick > curTick) {
             return NULL;
         }
-        return pendingList.front();
+        return readyList.front();
+    }
+
+    Tick nextMSHRReadyTick() const
+    {
+        return readyList.empty() ? MaxTick : readyList.front()->readyTick;
     }
 };
 
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 2644a504c..0a2127490 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -91,28 +91,30 @@ SimpleTimingPort::schedSendTiming(PacketPtr pkt, Tick when)
     assert(when > curTick);
 
     // Nothing is on the list: add it and schedule an event
-    if (transmitList.empty()) {
-        assert(!sendEvent->scheduled());
-        sendEvent->schedule(when);
-        transmitList.push_back(DeferredPacket(when, pkt));
+    if (transmitList.empty() || when < transmitList.front().tick) {
+        transmitList.push_front(DeferredPacket(when, pkt));
+        schedSendEvent(when);
         return;
     }
 
-    // something is on the list and this belongs at the end
+    // list is non-empty and this is not the head, so event should
+    // already be scheduled
+    assert(waitingOnRetry ||
+           (sendEvent->scheduled() && sendEvent->when() <= when));
+
+    // list is non-empty & this belongs at the end
     if (when >= transmitList.back().tick) {
         transmitList.push_back(DeferredPacket(when, pkt));
         return;
     }
-    // Something is on the list and this belongs somewhere else
+
+    // this belongs in the middle somewhere
     DeferredPacketIterator i = transmitList.begin();
+    i++; // already checked for insertion at front
     DeferredPacketIterator end = transmitList.end();
 
     for (; i != end; ++i) {
         if (when < i->tick) {
-            if (i == transmitList.begin()) {
-                //Inserting at begining, reschedule
-                sendEvent->reschedule(when);
-            }
             transmitList.insert(i, DeferredPacket(when, pkt));
             return;
         }
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index ea0f05ed1..bfed29f34 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -105,6 +105,24 @@ class SimpleTimingPort : public Port
     bool deferredPacketReady()
     { return !transmitList.empty() && transmitList.front().tick <= curTick; }
 
+    Tick deferredPacketReadyTick()
+    { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
+
+    void schedSendEvent(Tick when)
+    {
+        if (waitingOnRetry) {
+            assert(!sendEvent->scheduled());
+            return;
+        }
+
+        if (!sendEvent->scheduled()) {
+            sendEvent->schedule(when);
+        } else if (sendEvent->when() > when) {
+            sendEvent->reschedule(when);
+        }
+    }
+
+
     /** Schedule a sendTiming() event to be called in the future.
      * @param pkt packet to send
      * @param absolute time (in ticks) to send packet
-- 
cgit v1.2.3


From f697e959a17646500bca7c12e6bb7b30e047f1bb Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 25 Jun 2007 22:23:29 -0700
Subject: Couple minor bug fixes...

src/mem/cache/cache_impl.hh:
    Handle grants with no packet.
src/mem/cache/miss/mshr.cc:
    Fix MSHR snoop hit handling.

--HG--
extra : convert_revision : f365283afddaa07cb9e050b2981ad6a898c14451
---
 src/mem/cache/cache_impl.hh | 27 ++++++++++++++++-----------
 src/mem/cache/miss/mshr.cc  | 10 ++++++----
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 7610b5a41..48efc5ca3 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -912,7 +912,6 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 
             if (pkt->isInvalidate()) {
                 // Invalidation trumps our writeback... discard here
-                assert(0);
                 markInService(mshr);
             }
             return;
@@ -1201,18 +1200,24 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     } else {
         // check for non-response packets (requests & writebacks)
         PacketPtr pkt = myCache()->getTimingPacket();
-        assert(pkt != NULL);
-        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+        if (pkt == NULL) {
+            // can happen if e.g. we attempt a writeback and fail, but
+            // before the retry, the writeback is eliminated because
+            // we snoop another cache's ReadEx.
+            waitingOnRetry = false;
+        } else {
+            MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
-        bool success = sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "successful" : "unsuccessful");
+            bool success = sendTiming(pkt);
+            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                    pkt->getAddr(), success ? "successful" : "unsuccessful");
 
-        waitingOnRetry = !success;
-        if (waitingOnRetry) {
-            DPRINTF(CachePort, "now waiting on a retry\n");
-        } else {
-            myCache()->markInService(mshr);
+            waitingOnRetry = !success;
+            if (waitingOnRetry) {
+                DPRINTF(CachePort, "now waiting on a retry\n");
+            } else {
+                myCache()->markInService(mshr);
+            }
         }
     }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 8fa11ab2e..fc8d2175e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -132,13 +132,15 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
         // We're awaiting an exclusive copy, so ownership is pending.
         // It's up to us to respond once the data arrives.
         target->assertMemInhibit();
-    } else if (target->needsExclusive()) {
+    }
+
+    if (target->needsExclusive()) {
         // This transaction will take away our pending copy
         pendingInvalidate = true;
     } else {
-        // If we're not going to supply data or perform an
-        // invalidation, we don't need to save this.
-        return;
+        // We'll keep our pending copy, but we can't let the other guy
+        // think he's getting it exclusive
+        target->assertShared();
     }
 
     targets.push_back(Target(target, when, _order, false));
-- 
cgit v1.2.3


From 7dacbcf49262605a75e461149ec7bd7a00fca7b7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 14:53:15 -0700
Subject: Handle replacement of block with pending upgrade.

src/mem/cache/tags/lru.cc:
    Add some replacement DPRINTFs

--HG--
extra : convert_revision : 7993ec24d6af7e7774d04ce36f20e3f43f887fd9
---
 src/mem/cache/cache_impl.hh | 27 ++++++++++++++++++-----
 src/mem/cache/miss/mshr.cc  | 53 +++++++++++++++++++++++++++++++++++++++++++++
 src/mem/cache/miss/mshr.hh  | 11 ++++++++++
 src/mem/cache/tags/lru.cc   |  4 ++++
 4 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 48efc5ca3..d01adde78 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -300,7 +300,6 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         return true;
     }
 
-    PacketList writebacks;
     int lat = hitLatency;
     bool satisfied = false;
 
@@ -319,6 +318,8 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     }
 
 #if 0
+    PacketList writebacks;
+
     // If this is a block size write/hint (WH64) allocate the block here
     // if the coherence protocol allows it.
     /** @todo make the fast write alloc (wh64) work with coherence. */
@@ -338,7 +339,6 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             ++fastWrites;
         }
     }
-#endif
 
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
@@ -346,6 +346,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         allocateBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
+#endif
 
     bool needsResponse = pkt->needsResponse();
 
@@ -676,6 +677,15 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
+
+        if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) {
+            if (!mshr->handleReplacedPendingUpgrade(pkt)) {
+                mq->markPending(mshr);
+                requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+                return;
+            }
+        }
+
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
         deallocate = satisfyMSHR(mshr, pkt, blk);
@@ -747,15 +757,20 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
     Addr addr = pkt->getAddr();
 
     if (blk == NULL) {
-        // better have read new data
+        // better have read new data...
         assert(pkt->isRead());
 
         // need to do a replacement
         blk = tags->findReplacement(addr, writebacks);
         if (blk->isValid()) {
+            Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
+            MSHR *repl_mshr = mshrQueue.findMatch(repl_addr);
+            if (repl_mshr) {
+                repl_mshr->handleReplacement(blk, blkSize);
+            }
+
             DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
-                    blk->isDirty() ? "writeback" : "clean");
+                    repl_addr, addr, blk->isDirty() ? "writeback" : "clean");
 
             if (blk->isDirty()) {
                 // Save writeback packet for handling by caller
@@ -992,7 +1007,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
             return conflict_mshr;
         }
 
-        // No conclifts; issue read
+        // No conflicts; issue read
         return miss_mshr;
     }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index fc8d2175e..ca5e38601 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -75,6 +75,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
+    replacedPendingUpgrade = false;
+    data = NULL;
 }
 
 void
@@ -170,6 +172,57 @@ MSHR::promoteDeferredTargets()
 }
 
 
+void
+MSHR::handleReplacement(CacheBlk *blk, int blkSize)
+{
+    // must be an outstanding upgrade request on block we're about to
+    // replace...
+    assert(!blk->isWritable());
+    assert(needsExclusive);
+    replacedPendingUpgrade = true;
+
+    // if it's dirty, just remember what happened and allow the
+    // writeback to continue.  we'll reissue a ReadEx later whether
+    // the upgrade succeeds or not
+    if (blk->isDirty()) {
+        replacedPendingUpgradeDirty = true;
+        return;
+    }
+
+    // if not dirty, we need to save it off as it will be only valid
+    // copy in system if upgrade is successful (and may need to be
+    // written back then, as the current owner if any will be
+    // invalidating its block)
+    replacedPendingUpgradeDirty = false;
+    data = new uint8_t[blkSize];
+    std::memcpy(data, blk->data, blkSize);
+}
+
+
+bool
+MSHR::handleReplacedPendingUpgrade(Packet *pkt)
+{
+    // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel
+    assert(pkt->cmd == MemCmd::UpgradeResp);
+    assert(replacedPendingUpgrade);
+    replacedPendingUpgrade = false; // reset
+    if (replacedPendingUpgradeDirty) {
+        // we wrote back the previous copy; just reissue as a ReadEx
+        return false;
+    }
+
+    // previous copy was not dirty, but we are now owner...  fake out
+    // cache by taking saved data and converting UpgradeResp to
+    // ReadExResp
+    assert(data);
+    pkt->cmd = MemCmd::ReadExResp;
+    pkt->setData(data);
+    delete [] data;
+    data = NULL;
+    return true;
+}
+
+
 void
 MSHR::dump()
 {
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 92288cf52..a9380d99a 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -105,12 +105,20 @@ class MSHR : public Packet::SenderState
 
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
+    /** Is there a pending upgrade that got replaced? */
+    bool replacedPendingUpgrade;
+    bool replacedPendingUpgradeDirty;
 
     /** Thread number of the miss. */
     short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
 
+
+    /** Data buffer (if needed).  Currently used only for pending
+     * upgrade handling. */
+    uint8_t *data;
+
     /**
      * Pointer to this MSHR on the ready list.
      * @sa MissQueue, MSHRQueue::readyList
@@ -204,6 +212,9 @@ public:
 
     bool promoteDeferredTargets();
 
+    void handleReplacement(CacheBlk *blk, int blkSize);
+    bool handleReplacedPendingUpgrade(Packet *pkt);
+
     /**
      * Prints the contents of this MSHR to stderr.
      */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 334312aaf..fa46aff7b 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -173,6 +173,8 @@ LRU::findBlock(Addr addr, int &lat)
     if (blk != NULL) {
         // move this block to head of the MRU list
         sets[set].moveToHead(blk);
+        DPRINTF(Cache, "set %x: moving blk %x to MRU\n",
+                set, regenerateBlkAddr(tag, set));
         if (blk->whenReady > curTick
             && blk->whenReady - curTick > hitLatency) {
             lat = blk->whenReady - curTick;
@@ -214,6 +216,8 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         }
     }
 
+    DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+            set, regenerateBlkAddr(blk->tag, set));
     return blk;
 }
 
-- 
cgit v1.2.3


From 69ff6d9163c431272fc084b8e051996b44590a53 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 18:01:22 -0400
Subject: cache_impl.hh: Change target overflow from assertion to warning.

src/mem/cache/cache_impl.hh:
    Change target overflow from assertion to warning.

--HG--
extra : convert_revision : ceca990ed916bbf96dedd4836c40df522803f173
---
 src/mem/cache/cache_impl.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index d01adde78..a73612f24 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -892,9 +892,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // better not be snooping a request that conflicts with something
     // we have outstanding...
     if (mshr && mshr->inService) {
-        assert(mshr->getNumTargets() < numTarget); //handle later
         mshr->allocateSnoopTarget(pkt, curTick, order++);
-        assert(mshr->getNumTargets() < numTarget); //handle later
+        if (mshr->getNumTargets() > numTarget)
+           warn("allocating bonus target for snoop"); //handle later
         return;
     }
 
-- 
cgit v1.2.3


From 1b20df5607e86d3b384716792274fe01fa4f3f80 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 22:23:10 -0700
Subject: Handle deferred snoops better.

--HG--
extra : convert_revision : 703da6128832eb0d5cfed7724e5105f4b3fe4f90
---
 src/mem/cache/cache.hh      |  6 ++--
 src/mem/cache/cache_impl.hh | 34 ++++++++++---------
 src/mem/cache/miss/mshr.cc  | 82 ++++++++++++++++++++++++++++-----------------
 src/mem/cache/miss/mshr.hh  |  3 +-
 src/mem/cache/tags/lru.cc   |  5 +--
 src/mem/tport.cc            | 13 ++++++-
 6 files changed, 91 insertions(+), 52 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 2a95dc53c..161fb801d 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -185,14 +185,16 @@ class Cache : public BaseCache
     void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
-    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
+    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data,
+                                bool already_copied);
 
     /**
      * Sets the blk to the new state.
      * @param blk The cache block being snooped.
      * @param new_state The new coherence state for the block.
      */
-    void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing);
+    void handleSnoop(PacketPtr ptk, BlkType *blk,
+                     bool is_timing, bool is_deferred);
 
     /**
      * Create a writeback request for the given block.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a73612f24..599eecc82 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -622,7 +622,7 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
         } else {
             // response to snoop request
             DPRINTF(Cache, "processing deferred snoop...\n");
-            handleSnoop(target->pkt, blk, true);
+            handleSnoop(target->pkt, blk, true, true);
         }
 
         mshr->popTarget();
@@ -678,12 +678,10 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
 
-        if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) {
-            if (!mshr->handleReplacedPendingUpgrade(pkt)) {
-                mq->markPending(mshr);
-                requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-                return;
-            }
+        if (!mshr->handleFill(pkt, blk)) {
+            mq->markPending(mshr);
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return;
         }
 
         PacketList writebacks;
@@ -814,10 +812,12 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
-                                                  uint8_t *blk_data)
+                                                  uint8_t *blk_data,
+                                                  bool already_copied)
 {
-    // timing-mode snoop responses require a new packet
-    PacketPtr pkt = new Packet(req_pkt);
+    // timing-mode snoop responses require a new packet, unless we
+    // already made a copy...
+    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
     pkt->allocate();
     pkt->makeTimingResponse();
     pkt->setDataFromBlock(blk_data, blkSize);
@@ -827,7 +827,7 @@ Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                                       bool is_timing)
+                                       bool is_timing, bool is_deferred)
 {
     if (!blk || !blk->isValid()) {
         return;
@@ -854,9 +854,10 @@ Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
     }
 
     if (supply) {
+        assert(!pkt->memInhibitAsserted());
         pkt->assertMemInhibit();
         if (is_timing) {
-            doTimingSupplyResponse(pkt, blk->data);
+            doTimingSupplyResponse(pkt, blk->data, is_deferred);
         } else {
             pkt->makeAtomicResponse();
             pkt->setDataFromBlock(blk->data, blkSize);
@@ -892,6 +893,8 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // better not be snooping a request that conflicts with something
     // we have outstanding...
     if (mshr && mshr->inService) {
+        DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n",
+                blk_addr);
         mshr->allocateSnoopTarget(pkt, curTick, order++);
         if (mshr->getNumTargets() > numTarget)
            warn("allocating bonus target for snoop"); //handle later
@@ -913,6 +916,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
             assert(wb_pkt->cmd == MemCmd::Writeback);
 
             if (pkt->isRead()) {
+                assert(!pkt->memInhibitAsserted());
                 pkt->assertMemInhibit();
                 if (!pkt->needsExclusive()) {
                     pkt->assertShared();
@@ -922,7 +926,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
                     // the packet's invalidate flag is set...
                     assert(pkt->isInvalidate());
                 }
-                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>());
+                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>(), false);
             }
 
             if (pkt->isInvalidate()) {
@@ -933,7 +937,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
         }
     }
 
-    handleSnoop(pkt, blk, true);
+    handleSnoop(pkt, blk, true, false);
 }
 
 
@@ -948,7 +952,7 @@ Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
     }
 
     BlkType *blk = tags->findBlock(pkt->getAddr());
-    handleSnoop(pkt, blk, false);
+    handleSnoop(pkt, blk, false, false);
     return hitLatency;
 }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index ca5e38601..23645cb27 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -75,6 +75,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
+    pendingShared = false;
     replacedPendingUpgrade = false;
     data = NULL;
 }
@@ -120,7 +121,7 @@ MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
+MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -130,23 +131,33 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
         return;
     }
 
-    if (needsExclusive) {
-        // We're awaiting an exclusive copy, so ownership is pending.
-        // It's up to us to respond once the data arrives.
-        target->assertMemInhibit();
-    }
+    DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr,
+            needsExclusive ? "needsExclusive" : "",
+            pkt->needsExclusive() ? "pkt->needsExclusive()" : "");
+
+    if (needsExclusive || pkt->needsExclusive()) {
+        // actual target device (typ. PhysicalMemory) will delete the
+        // packet on reception, so we need to save a copy here
+        targets.push_back(Target(new Packet(pkt), when, _order, false));
+        ++ntargets;
+
+        if (needsExclusive) {
+            // We're awaiting an exclusive copy, so ownership is pending.
+            // It's up to us to respond once the data arrives.
+            pkt->assertMemInhibit();
+        }
 
-    if (target->needsExclusive()) {
-        // This transaction will take away our pending copy
-        pendingInvalidate = true;
+        if (pkt->needsExclusive()) {
+            // This transaction will take away our pending copy
+            pendingInvalidate = true;
+        }
     } else {
-        // We'll keep our pending copy, but we can't let the other guy
-        // think he's getting it exclusive
-        target->assertShared();
+        // Read to a read: no conflict, so no need to record as
+        // target, but make sure neither reader thinks he's getting an
+        // exclusive copy
+        pendingShared = true;
+        pkt->assertShared();
     }
-
-    targets.push_back(Target(target, when, _order, false));
-    ++ntargets;
 }
 
 
@@ -164,6 +175,7 @@ MSHR::promoteDeferredTargets()
 
     needsExclusive = deferredNeedsExclusive;
     pendingInvalidate = false;
+    pendingShared = false;
     deferredNeedsExclusive = false;
     order = targets.front().order;
     readyTick = std::max(curTick, targets.front().time);
@@ -200,25 +212,33 @@ MSHR::handleReplacement(CacheBlk *blk, int blkSize)
 
 
 bool
-MSHR::handleReplacedPendingUpgrade(Packet *pkt)
+MSHR::handleFill(Packet *pkt, CacheBlk *blk)
 {
-    // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel
-    assert(pkt->cmd == MemCmd::UpgradeResp);
-    assert(replacedPendingUpgrade);
-    replacedPendingUpgrade = false; // reset
-    if (replacedPendingUpgradeDirty) {
-        // we wrote back the previous copy; just reissue as a ReadEx
-        return false;
+    if (replacedPendingUpgrade) {
+        // block was replaced while upgrade request was in service
+        assert(pkt->cmd == MemCmd::UpgradeResp);
+        assert(blk == NULL);
+        assert(replacedPendingUpgrade);
+        replacedPendingUpgrade = false; // reset
+        if (replacedPendingUpgradeDirty) {
+            // we wrote back the previous copy; just reissue as a ReadEx
+            return false;
+        }
+
+        // previous copy was not dirty, but we are now owner...  fake out
+        // cache by taking saved data and converting UpgradeResp to
+        // ReadExResp
+        assert(data);
+        pkt->cmd = MemCmd::ReadExResp;
+        pkt->setData(data);
+        delete [] data;
+        data = NULL;
+    } else if (pendingShared) {
+        // we snooped another read while this read was in
+        // service... assert shared line on its behalf
+        pkt->assertShared();
     }
 
-    // previous copy was not dirty, but we are now owner...  fake out
-    // cache by taking saved data and converting UpgradeResp to
-    // ReadExResp
-    assert(data);
-    pkt->cmd = MemCmd::ReadExResp;
-    pkt->setData(data);
-    delete [] data;
-    data = NULL;
     return true;
 }
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index a9380d99a..07fe5c96c 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -105,6 +105,7 @@ class MSHR : public Packet::SenderState
 
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
+    bool pendingShared;
     /** Is there a pending upgrade that got replaced? */
     bool replacedPendingUpgrade;
     bool replacedPendingUpgradeDirty;
@@ -213,7 +214,7 @@ public:
     bool promoteDeferredTargets();
 
     void handleReplacement(CacheBlk *blk, int blkSize);
-    bool handleReplacedPendingUpgrade(Packet *pkt);
+    bool handleFill(Packet *pkt, CacheBlk *blk);
 
     /**
      * Prints the contents of this MSHR to stderr.
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index fa46aff7b..3269aa4db 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -207,6 +207,9 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         totalRefs += blk->refCount;
         ++sampledRefs;
         blk->refCount = 0;
+
+        DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+                set, regenerateBlkAddr(blk->tag, set));
     } else if (!blk->isTouched) {
         tagsInUse++;
         blk->isTouched = true;
@@ -216,8 +219,6 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         }
     }
 
-    DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
-            set, regenerateBlkAddr(blk->tag, set));
     return blk;
 }
 
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 0a2127490..6c8c12ce2 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -69,11 +69,21 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // if we ever added it back.
     assert(pkt->isRequest());
     assert(pkt->result == Packet::Unknown);
+
+    if (pkt->memInhibitAsserted()) {
+        // snooper will supply based on copy of packet
+        // still target's responsibility to delete packet
+        delete pkt->req;
+        delete pkt;
+        return true;
+    }
+
     bool needsResponse = pkt->needsResponse();
     Tick latency = recvAtomic(pkt);
     // turn packet around to go back to requester if response expected
     if (needsResponse) {
-        // recvAtomic() should already have turned packet into atomic response
+        // recvAtomic() should already have turned packet into
+        // atomic response
         assert(pkt->isResponse());
         pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
@@ -81,6 +91,7 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
         delete pkt->req;
         delete pkt;
     }
+
     return true;
 }
 
-- 
cgit v1.2.3


From c4903e088247ad187356864459d2e4be77d97154 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 23:30:30 -0700
Subject: Revamp replacement-of-upgrade handling.

--HG--
extra : convert_revision : 9bc09d8ae6d50e6dfbb4ab21514612f9aa102a2e
---
 src/mem/cache/cache.hh      |  3 +++
 src/mem/cache/cache_impl.hh | 44 ++++++++++++++++++++++++++------------
 src/mem/cache/miss/mshr.cc  | 51 +--------------------------------------------
 src/mem/cache/miss/mshr.hh  |  6 +-----
 4 files changed, 36 insertions(+), 68 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 161fb801d..9e8c35066 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -136,6 +136,9 @@ class Cache : public BaseCache
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
+    /** Temporary cache block for occasional transitory use */
+    BlkType *tempBlock;
+
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 599eecc82..3685bc8cb 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -58,6 +58,9 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
       doFastWrites(params.doFastWrites),
       prefetchMiss(params.prefetchMiss)
 {
+    tempBlock = new BlkType();
+    tempBlock->data = new uint8_t[blkSize];
+
     cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
     memSidePort = new MemSidePort(_name + "-mem_side_port", this);
     cpuSidePort->setOtherPort(memSidePort);
@@ -678,11 +681,8 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
 
-        if (!mshr->handleFill(pkt, blk)) {
-            mq->markPending(mshr);
-            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-            return;
-        }
+        // give mshr a chance to do some dirty work
+        mshr->handleFill(pkt, blk);
 
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
@@ -693,6 +693,13 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
             allocateBuffer(wbPkt, time, true);
             writebacks.pop_front();
         }
+        // if we used temp block, clear it out
+        if (blk == tempBlock) {
+            if (blk->isDirty()) {
+                allocateBuffer(writebackBlk(blk), time, true);
+            }
+            tags->invalidateBlk(blk);
+        }
     } else {
         if (pkt->req->isUncacheable()) {
             mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
@@ -764,15 +771,26 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
             Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
             MSHR *repl_mshr = mshrQueue.findMatch(repl_addr);
             if (repl_mshr) {
-                repl_mshr->handleReplacement(blk, blkSize);
-            }
-
-            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    repl_addr, addr, blk->isDirty() ? "writeback" : "clean");
+                // must be an outstanding upgrade request on block
+                // we're about to replace...
+                assert(!blk->isWritable());
+                assert(repl_mshr->needsExclusive);
+                // too hard to replace block with transient state;
+                // just use temporary storage to complete the current
+                // request and then get rid of it
+                assert(!tempBlock->isValid());
+                blk = tempBlock;
+                tempBlock->set = tags->extractSet(addr);
+                DPRINTF(Cache, "using temp block for %x\n", addr);
+            } else {
+                DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
+                        repl_addr, addr,
+                        blk->isDirty() ? "writeback" : "clean");
 
-            if (blk->isDirty()) {
-                // Save writeback packet for handling by caller
-                writebacks.push_back(writebackBlk(blk));
+                if (blk->isDirty()) {
+                    // Save writeback packet for handling by caller
+                    writebacks.push_back(writebackBlk(blk));
+                }
             }
         }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 23645cb27..63b3cacc2 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -76,7 +76,6 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
     pendingShared = false;
-    replacedPendingUpgrade = false;
     data = NULL;
 }
 
@@ -185,61 +184,13 @@ MSHR::promoteDeferredTargets()
 
 
 void
-MSHR::handleReplacement(CacheBlk *blk, int blkSize)
-{
-    // must be an outstanding upgrade request on block we're about to
-    // replace...
-    assert(!blk->isWritable());
-    assert(needsExclusive);
-    replacedPendingUpgrade = true;
-
-    // if it's dirty, just remember what happened and allow the
-    // writeback to continue.  we'll reissue a ReadEx later whether
-    // the upgrade succeeds or not
-    if (blk->isDirty()) {
-        replacedPendingUpgradeDirty = true;
-        return;
-    }
-
-    // if not dirty, we need to save it off as it will be only valid
-    // copy in system if upgrade is successful (and may need to be
-    // written back then, as the current owner if any will be
-    // invalidating its block)
-    replacedPendingUpgradeDirty = false;
-    data = new uint8_t[blkSize];
-    std::memcpy(data, blk->data, blkSize);
-}
-
-
-bool
 MSHR::handleFill(Packet *pkt, CacheBlk *blk)
 {
-    if (replacedPendingUpgrade) {
-        // block was replaced while upgrade request was in service
-        assert(pkt->cmd == MemCmd::UpgradeResp);
-        assert(blk == NULL);
-        assert(replacedPendingUpgrade);
-        replacedPendingUpgrade = false; // reset
-        if (replacedPendingUpgradeDirty) {
-            // we wrote back the previous copy; just reissue as a ReadEx
-            return false;
-        }
-
-        // previous copy was not dirty, but we are now owner...  fake out
-        // cache by taking saved data and converting UpgradeResp to
-        // ReadExResp
-        assert(data);
-        pkt->cmd = MemCmd::ReadExResp;
-        pkt->setData(data);
-        delete [] data;
-        data = NULL;
-    } else if (pendingShared) {
+    if (pendingShared) {
         // we snooped another read while this read was in
         // service... assert shared line on its behalf
         pkt->assertShared();
     }
-
-    return true;
 }
 
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 07fe5c96c..4db7b1cfe 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -106,9 +106,6 @@ class MSHR : public Packet::SenderState
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
     bool pendingShared;
-    /** Is there a pending upgrade that got replaced? */
-    bool replacedPendingUpgrade;
-    bool replacedPendingUpgradeDirty;
 
     /** Thread number of the miss. */
     short threadNum;
@@ -213,8 +210,7 @@ public:
 
     bool promoteDeferredTargets();
 
-    void handleReplacement(CacheBlk *blk, int blkSize);
-    bool handleFill(Packet *pkt, CacheBlk *blk);
+    void handleFill(Packet *pkt, CacheBlk *blk);
 
     /**
      * Prints the contents of this MSHR to stderr.
-- 
cgit v1.2.3


From 9117c94f9c74f0674d75731385a106d17a1dee09 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Wed, 27 Jun 2007 20:54:13 -0700
Subject: Get rid of coherence protocol object.

--HG--
extra : convert_revision : 4ff144342dca23af9a12a2169ca318a002654b42
---
 configs/example/memtest.py                    |   4 -
 src/mem/cache/cache.cc                        |  12 +-
 src/mem/cache/cache.hh                        |  28 +-
 src/mem/cache/cache_builder.cc                |  94 ++----
 src/mem/cache/cache_impl.hh                   | 191 +++++------
 src/mem/cache/coherence/CoherenceProtocol.py  |   8 -
 src/mem/cache/coherence/SConscript            |  36 --
 src/mem/cache/coherence/coherence_protocol.cc | 469 --------------------------
 src/mem/cache/coherence/coherence_protocol.hh | 257 --------------
 src/mem/cache/coherence/simple_coherence.hh   | 163 ---------
 10 files changed, 140 insertions(+), 1122 deletions(-)
 delete mode 100644 src/mem/cache/coherence/CoherenceProtocol.py
 delete mode 100644 src/mem/cache/coherence/SConscript
 delete mode 100644 src/mem/cache/coherence/coherence_protocol.cc
 delete mode 100644 src/mem/cache/coherence/coherence_protocol.hh
 delete mode 100644 src/mem/cache/coherence/simple_coherence.hh

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 0bc12e7bd..0e6260b5d 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -48,8 +48,6 @@ parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
 parser.add_option("-n", "--numtesters", type="int", default=8,
                   metavar="N",
                   help="Number of tester pseudo-CPUs [default: %default]")
-parser.add_option("-p", "--protocol", default="moesi",
-                  help="Coherence protocol [default: %default]")
 
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
@@ -95,7 +93,6 @@ class L1(BaseCache):
     block_size = block_size
     mshrs = num_l1_mshrs
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache
@@ -107,7 +104,6 @@ class L2(BaseCache):
     mshrs = num_l2_mshrs
     tgts_per_mshr = 16
     write_buffers = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 if options.numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 96f9a2e11..c640d4a60 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -58,8 +58,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-#include "mem/cache/coherence/simple_coherence.hh"
-
 #include "mem/cache/cache_impl.hh"
 
 // Template Instantiations
@@ -67,23 +65,23 @@
 
 
 #if defined(USE_CACHE_FALRU)
-template class Cache<FALRU, SimpleCoherence>;
+template class Cache<FALRU>;
 #endif
 
 #if defined(USE_CACHE_IIC)
-template class Cache<IIC, SimpleCoherence>;
+template class Cache<IIC>;
 #endif
 
 #if defined(USE_CACHE_LRU)
-template class Cache<LRU, SimpleCoherence>;
+template class Cache<LRU>;
 #endif
 
 #if defined(USE_CACHE_SPLIT)
-template class Cache<Split, SimpleCoherence>;
+template class Cache<Split>;
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
-template class Cache<SplitLIFO, SimpleCoherence>;
+template class Cache<SplitLIFO>;
 #endif
 
 #endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 9e8c35066..57028a05e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -39,9 +39,7 @@
 #ifndef __CACHE_HH__
 #define __CACHE_HH__
 
-#include "base/compression/base.hh"
 #include "base/misc.hh" // fatal, panic, and warn
-#include "cpu/smt.hh" // SMT_MAX_THREADS
 
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache_blk.hh"
@@ -55,11 +53,9 @@ class BasePrefetcher;
 /**
  * A template-policy based cache. The behavior of the cache can be altered by
  * supplying different template policies. TagStore handles all tag and data
- * storage @sa TagStore. Buffering handles all misses and writes/writebacks
- * @sa MissQueue. Coherence handles all coherence policy details @sa
- * UniCoherence, SimpleMultiCoherence.
+ * storage @sa TagStore.
  */
-template <class TagStore, class Coherence>
+template <class TagStore>
 class Cache : public BaseCache
 {
   public:
@@ -76,13 +72,13 @@ class Cache : public BaseCache
     {
       public:
         CpuSidePort(const std::string &_name,
-                    Cache<TagStore,Coherence> *_cache);
+                    Cache<TagStore> *_cache);
 
         // BaseCache::CachePort just has a BaseCache *; this function
         // lets us get back the type info we lost when we stored the
         // cache pointer there.
-        Cache<TagStore,Coherence> *myCache() {
-            return static_cast<Cache<TagStore,Coherence> *>(cache);
+        Cache<TagStore> *myCache() {
+            return static_cast<Cache<TagStore> *>(cache);
         }
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
@@ -99,13 +95,13 @@ class Cache : public BaseCache
     {
       public:
         MemSidePort(const std::string &_name,
-                    Cache<TagStore,Coherence> *_cache);
+                    Cache<TagStore> *_cache);
 
         // BaseCache::CachePort just has a BaseCache *; this function
         // lets us get back the type info we lost when we stored the
         // cache pointer there.
-        Cache<TagStore,Coherence> *myCache() {
-            return static_cast<Cache<TagStore,Coherence> *>(cache);
+        Cache<TagStore> *myCache() {
+            return static_cast<Cache<TagStore> *>(cache);
         }
 
         void sendPacket();
@@ -130,9 +126,6 @@ class Cache : public BaseCache
     /** Tag and data Storage */
     TagStore *tags;
 
-    /** Coherence protocol. */
-    Coherence *coherence;
-
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
@@ -212,20 +205,19 @@ class Cache : public BaseCache
     {
       public:
         TagStore *tags;
-        Coherence *coherence;
         BaseCache::Params baseParams;
         BasePrefetcher*prefetcher;
         bool prefetchAccess;
         const bool doFastWrites;
         const bool prefetchMiss;
 
-        Params(TagStore *_tags, Coherence *coh,
+        Params(TagStore *_tags,
                BaseCache::Params params,
                BasePrefetcher *_prefetcher,
                bool prefetch_access, int hit_latency,
                bool do_fast_writes,
                bool prefetch_miss)
-            : tags(_tags), coherence(coh),
+            : tags(_tags),
               baseParams(params),
               prefetcher(_prefetcher), prefetchAccess(prefetch_access),
               doFastWrites(do_fast_writes),
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index 307c851a2..65418b68d 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -42,7 +42,6 @@
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache.hh"
 #include "mem/bus.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
 #include "sim/builder.hh"
 
 // Tag Templates
@@ -66,13 +65,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-// Compression Templates
-#include "base/compression/null_compression.hh"
-#include "base/compression/lzss_compression.hh"
-
-// Coherence Templates
-#include "mem/cache/coherence/simple_coherence.hh"
-
 //Prefetcher Headers
 #if defined(USE_GHB)
 #include "mem/cache/prefetch/ghb_prefetcher.hh"
@@ -100,16 +92,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache)
     Param<int> tgts_per_mshr;
     Param<int> write_buffers;
     Param<bool> prioritizeRequests;
-    SimObjectParam<CoherenceProtocol *> protocol;
     Param<Addr> trace_addr;
     Param<int> hash_delay;
 #if defined(USE_CACHE_IIC)
     SimObjectParam<Repl *> repl;
 #endif
-    Param<bool> compressed_bus;
-    Param<bool> store_compressed;
-    Param<bool> adaptive_compression;
-    Param<int> compression_latency;
     Param<int> subblock_size;
     Param<Counter> max_miss_count;
     VectorParam<Range<Addr> > addr_range;
@@ -144,23 +131,12 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
     INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8),
     INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first",
                     false),
-    INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL),
     INIT_PARAM_DFLT(trace_addr, "address to trace", 0),
 
     INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1),
 #if defined(USE_CACHE_IIC)
     INIT_PARAM_DFLT(repl, "replacement policy",NULL),
 #endif
-    INIT_PARAM_DFLT(compressed_bus,
-                    "This cache connects to a compressed memory",
-                    false),
-    INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache",
-                    false),
-    INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme",
-                    false),
-    INIT_PARAM_DFLT(compression_latency,
-                    "Latency in cycles of compression algorithm",
-                    0),
     INIT_PARAM_DFLT(subblock_size,
                     "Size of subblock in IIC used for compression",
                     0),
@@ -188,7 +164,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
 END_INIT_SIM_OBJECT_PARAMS(BaseCache)
 
 
-#define BUILD_CACHE(TAGS, tags, c)                                      \
+#define BUILD_CACHE(TAGS, tags)                                      \
     do {                                                                \
         BasePrefetcher *pf;                                           \
         if (pf_policy == "tagged") {                                    \
@@ -203,12 +179,12 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         else {                                                          \
             BUILD_NULL_PREFETCHER(TAGS);                                \
         }                                                               \
-        Cache<TAGS, c>::Params params(tags, coh, base_params,       \
-                                      pf, prefetch_access, latency, \
-                                      true,                             \
-                                      prefetch_miss);                   \
-        Cache<TAGS, c> *retval =                                        \
-            new Cache<TAGS, c>(getInstanceName(), params);              \
+        Cache<TAGS>::Params params(tags, base_params,       \
+                                   pf, prefetch_access, latency,        \
+                                   true,                                \
+                                   prefetch_miss);                      \
+        Cache<TAGS> *retval =                                        \
+            new Cache<TAGS>(getInstanceName(), params);              \
         return retval;                                                  \
     } while (0)
 
@@ -216,79 +192,68 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         panic("%s not compiled into M5", x);		\
     } while (0)
 
-#define BUILD_COMPRESSED_CACHE(TAGS, tags, c)           \
-    do {                                                \
-        CompressionAlgorithm *compAlg;                  \
-        if (compressed_bus || store_compressed) {       \
-            compAlg = new LZSSCompression();            \
-        } else {                                        \
-            compAlg = new NullCompression();            \
-        }                                               \
-        BUILD_CACHE(TAGS, tags, c);                     \
-    } while (0)
-
 #if defined(USE_CACHE_FALRU)
-#define BUILD_FALRU_CACHE(c) do {			    \
+#define BUILD_FALRU_CACHE do {			    \
         FALRU *tags = new FALRU(block_size, size, latency); \
-        BUILD_COMPRESSED_CACHE(FALRU, tags, c);		\
+        BUILD_CACHE(FALRU, tags);		\
     } while (0)
 #else
-#define BUILD_FALRU_CACHE(c) BUILD_CACHE_PANIC("falru cache")
+#define BUILD_FALRU_CACHE BUILD_CACHE_PANIC("falru cache")
 #endif
 
 #if defined(USE_CACHE_LRU)
-#define BUILD_LRU_CACHE(c) do {				\
+#define BUILD_LRU_CACHE do {				\
         LRU *tags = new LRU(numSets, block_size, assoc, latency);	\
-        BUILD_COMPRESSED_CACHE(LRU, tags, c);			\
+        BUILD_CACHE(LRU, tags);			\
     } while (0)
 #else
-#define BUILD_LRU_CACHE(c) BUILD_CACHE_PANIC("lru cache")
+#define BUILD_LRU_CACHE BUILD_CACHE_PANIC("lru cache")
 #endif
 
 #if defined(USE_CACHE_SPLIT)
-#define BUILD_SPLIT_CACHE(c) do {					\
+#define BUILD_SPLIT_CACHE do {					\
         Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \
                                 two_queue, latency);		\
-        BUILD_COMPRESSED_CACHE(Split, tags, c);			\
+        BUILD_CACHE(Split, tags);			\
     } while (0)
 #else
-#define BUILD_SPLIT_CACHE(c) BUILD_CACHE_PANIC("split cache")
+#define BUILD_SPLIT_CACHE BUILD_CACHE_PANIC("split cache")
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
-#define BUILD_SPLIT_LIFO_CACHE(c) do {				\
+#define BUILD_SPLIT_LIFO_CACHE do {				\
         SplitLIFO *tags = new SplitLIFO(block_size, size, assoc,        \
                                         latency, two_queue, -1);	\
-        BUILD_COMPRESSED_CACHE(SplitLIFO, tags, c);			\
+        BUILD_CACHE(SplitLIFO, tags);			\
     } while (0)
 #else
-#define BUILD_SPLIT_LIFO_CACHE(c) BUILD_CACHE_PANIC("lifo cache")
+#define BUILD_SPLIT_LIFO_CACHE BUILD_CACHE_PANIC("lifo cache")
 #endif
 
 #if defined(USE_CACHE_IIC)
-#define BUILD_IIC_CACHE(c) do {			\
+#define BUILD_IIC_CACHE do {			\
         IIC *tags = new IIC(iic_params);		\
-        BUILD_COMPRESSED_CACHE(IIC, tags, c);	\
+        BUILD_CACHE(IIC, tags);	\
     } while (0)
 #else
-#define BUILD_IIC_CACHE(c) BUILD_CACHE_PANIC("iic")
+#define BUILD_IIC_CACHE BUILD_CACHE_PANIC("iic")
 #endif
 
-#define BUILD_CACHES(c) do {				\
+#define BUILD_CACHES do {				\
         if (repl == NULL) {				\
             if (numSets == 1) {				\
-                BUILD_FALRU_CACHE(c);		\
+                BUILD_FALRU_CACHE;		\
             } else {					\
                 if (split == true) {			\
-                    BUILD_SPLIT_CACHE(c);		\
+                    BUILD_SPLIT_CACHE;		\
                 } else if (lifo == true) {		\
-                    BUILD_SPLIT_LIFO_CACHE(c);	\
+                    BUILD_SPLIT_LIFO_CACHE;	\
                 } else {				\
-                    BUILD_LRU_CACHE(c);		\
+                    BUILD_LRU_CACHE;		\
                 }					\
             }						\
         } else {					\
-            BUILD_IIC_CACHE(c);			\
+            BUILD_IIC_CACHE;			\
         }						\
     } while (0)
 
@@ -399,8 +364,7 @@ CREATE_SIM_OBJECT(BaseCache)
     const void *repl = NULL;
 #endif
 
-    SimpleCoherence *coh = new SimpleCoherence(protocol);
-    BUILD_CACHES(SimpleCoherence);
+    BUILD_CACHES;
     return NULL;
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 3685bc8cb..b76d7e392 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -48,13 +48,13 @@
 #include "sim/sim_exit.hh" // for SimExitEvent
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::Cache(const std::string &_name,
-                                 Cache<TagStore,Coherence>::Params &params)
+template<class TagStore>
+Cache<TagStore>::Cache(const std::string &_name,
+                       Cache<TagStore>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
       tags(params.tags),
-      coherence(params.coherence), prefetcher(params.prefetcher),
+      prefetcher(params.prefetcher),
       doFastWrites(params.doFastWrites),
       prefetchMiss(params.prefetchMiss)
 {
@@ -67,23 +67,21 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
     memSidePort->setOtherPort(cpuSidePort);
 
     tags->setCache(this);
-    coherence->setCache(this);
     prefetcher->setCache(this);
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::regStats()
+Cache<TagStore>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    coherence->regStats(name());
     prefetcher->regStats(name());
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Port *
-Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
+Cache<TagStore>::getPort(const std::string &if_name, int idx)
 {
     if (if_name == "" || if_name == "cpu_side") {
         return cpuSidePort;
@@ -96,9 +94,9 @@ Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
     }
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+Cache<TagStore>::deletePortRefs(Port *p)
 {
     if (cpuSidePort == p || memSidePort == p)
         panic("Can only delete functional ports\n");
@@ -107,9 +105,9 @@ Cache<TagStore,Coherence>::deletePortRefs(Port *p)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
+Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 {
     uint64_t overwrite_val;
     bool overwrite_mem;
@@ -152,9 +150,9 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::markInService(MSHR *mshr)
+Cache<TagStore>::markInService(MSHR *mshr)
 {
     markInServiceInternal(mshr);
 #if 0
@@ -171,9 +169,9 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::squash(int threadNum)
+Cache<TagStore>::squash(int threadNum)
 {
     bool unblock = false;
     BlockedCause cause = NUM_BLOCKED_CAUSES;
@@ -199,9 +197,9 @@ Cache<TagStore,Coherence>::squash(int threadNum)
 //
 /////////////////////////////////////////////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
+Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
     if (pkt->req->isUncacheable())  {
         blk = NULL;
@@ -280,9 +278,9 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
+Cache<TagStore>::timingAccess(PacketPtr pkt)
 {
 //@todo Add back in MemDebug Calls
 //    MemDebug::cacheAccess(pkt);
@@ -398,10 +396,10 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
-                                        bool needsExclusive)
+Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                              bool needsExclusive)
 {
     bool blkValid = blk && blk->isValid();
 
@@ -441,9 +439,9 @@ Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
+Cache<TagStore>::atomicAccess(PacketPtr pkt)
 {
     int lat = hitLatency;
 
@@ -511,10 +509,10 @@ Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
-                                            CachePort *otherSidePort)
+Cache<TagStore>::functionalAccess(PacketPtr pkt,
+                                  CachePort *otherSidePort)
 {
     Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
     BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -561,9 +559,9 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
+Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
     assert(blk);
     assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
@@ -586,10 +584,10 @@ Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                                       BlkType *blk)
+Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                             BlkType *blk)
 {
     // respond to MSHR targets, if any
 
@@ -642,9 +640,9 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
+Cache<TagStore>::handleResponse(PacketPtr pkt)
 {
     Tick time = curTick + hitLatency;
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
@@ -730,9 +728,9 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
+Cache<TagStore>::writebackBlk(BlkType *blk)
 {
     assert(blk && blk->isValid() && blk->isDirty());
 
@@ -754,12 +752,13 @@ Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
 // is called by both atomic and timing-mode accesses, and in atomic
 // mode we don't mess with the write buffer (we just perform the
 // writebacks atomically once the original request is complete).
-template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
-                                      PacketList &writebacks)
+template<class TagStore>
+typename Cache<TagStore>::BlkType*
+Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
+                            PacketList &writebacks)
 {
     Addr addr = pkt->getAddr();
+    CacheBlk::State old_state = blk ? blk->status : 0;
 
     if (blk == NULL) {
         // better have read new data...
@@ -795,21 +794,24 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
         }
 
         blk->tag = tags->extractTag(addr);
-        blk->status = coherence->getNewState(pkt);
     } else {
         // existing block... probably an upgrade
         assert(blk->tag == tags->extractTag(addr));
         // either we're getting new data or the block should already be valid
         assert(pkt->isRead() || blk->isValid());
-        CacheBlk::State old_state = blk->status;
-        blk->status = coherence->getNewState(pkt, old_state);
-        if (blk->status != old_state)
-            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
-                    addr, old_state, blk->status);
-        else
-            warn("Changing state to same value\n");
     }
 
+    if (pkt->needsExclusive()) {
+        blk->status = BlkValid | BlkWritable | BlkDirty;
+    } else if (!pkt->sharedAsserted()) {
+        blk->status = BlkValid | BlkWritable;
+    } else {
+        blk->status = BlkValid;
+    }
+
+    DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+            addr, old_state, blk->status);
+
     // if we got new data, copy it in
     if (pkt->isRead()) {
         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
@@ -827,11 +829,11 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 //
 /////////////////////////////////////////////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
-                                                  uint8_t *blk_data,
-                                                  bool already_copied)
+Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
+                                        uint8_t *blk_data,
+                                        bool already_copied)
 {
     // timing-mode snoop responses require a new packet, unless we
     // already made a copy...
@@ -842,10 +844,10 @@ Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
     memSidePort->respond(pkt, curTick + hitLatency);
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                                       bool is_timing, bool is_deferred)
+Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
+                             bool is_timing, bool is_deferred)
 {
     if (!blk || !blk->isValid()) {
         return;
@@ -894,9 +896,9 @@ Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
+Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
@@ -959,9 +961,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
+Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         // Can't get a hit on an uncacheable address
@@ -975,9 +977,9 @@ Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 MSHR *
-Cache<TagStore,Coherence>::getNextMSHR()
+Cache<TagStore>::getNextMSHR()
 {
     // Check both MSHR queue and write buffer for potential requests
     MSHR *miss_mshr  = mshrQueue.getNextMSHR();
@@ -1051,9 +1053,9 @@ Cache<TagStore,Coherence>::getNextMSHR()
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::getTimingPacket()
+Cache<TagStore>::getTimingPacket()
 {
     MSHR *mshr = getNextMSHR();
 
@@ -1100,9 +1102,9 @@ Cache<TagStore,Coherence>::getTimingPacket()
 //
 ///////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::CpuSidePort::
+Cache<TagStore>::CpuSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
     // CPU side port doesn't snoop; it's a target only.
@@ -1112,9 +1114,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
     if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
@@ -1127,17 +1129,17 @@ Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
     return myCache()->atomicAccess(pkt);
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
     if (pkt->result != Packet::Success)
@@ -1145,10 +1147,10 @@ Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
+template<class TagStore>
+Cache<TagStore>::
 CpuSidePort::CpuSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
+                         Cache<TagStore> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
 }
@@ -1159,9 +1161,9 @@ CpuSidePort::CpuSidePort(const std::string &_name,
 //
 ///////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::
+Cache<TagStore>::MemSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
     otherPort->getPeerAddressRanges(resp, snoop);
@@ -1171,9 +1173,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
 {
     // this needs to be fixed so that the cache updates the mshr and sends the
     // packet back out on the link, but it probably won't happen so until this
@@ -1196,9 +1198,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvAtomic(PacketPtr pkt)
 {
     // in atomic mode, responses go back to the sender via the
     // function return from sendAtomic(), not via a separate
@@ -1209,9 +1211,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
     if (pkt->result != Packet::Success)
@@ -1220,9 +1222,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::sendPacket()
+Cache<TagStore>::MemSidePort::sendPacket()
 {
     // if we have responses that are ready, they take precedence
     if (deferredPacketReady()) {
@@ -1278,28 +1280,27 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     }
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+Cache<TagStore>::MemSidePort::recvRetry()
 {
     assert(waitingOnRetry);
     sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::processSendEvent()
+Cache<TagStore>::MemSidePort::processSendEvent()
 {
     assert(!waitingOnRetry);
     sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-MemSidePort::MemSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
+template<class TagStore>
+Cache<TagStore>::
+MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
     // override default send event from SimpleTimingPort
diff --git a/src/mem/cache/coherence/CoherenceProtocol.py b/src/mem/cache/coherence/CoherenceProtocol.py
deleted file mode 100644
index 82adb6862..000000000
--- a/src/mem/cache/coherence/CoherenceProtocol.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from m5.SimObject import SimObject
-from m5.params import *
-class Coherence(Enum): vals = ['uni', 'msi', 'mesi', 'mosi', 'moesi']
-
-class CoherenceProtocol(SimObject):
-    type = 'CoherenceProtocol'
-    do_upgrades = Param.Bool(True, "use upgrade transactions?")
-    protocol = Param.Coherence("name of coherence protocol")
diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript
deleted file mode 100644
index 91720b20e..000000000
--- a/src/mem/cache/coherence/SConscript
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- mode:python -*-
-
-# Copyright (c) 2006 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Nathan Binkert
-
-Import('*')
-
-SimObject('CoherenceProtocol.py')
-
-Source('coherence_protocol.cc')
-
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
deleted file mode 100644
index 47d2b469f..000000000
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Steve Reinhardt
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Definitions of CoherenceProtocol.
- */
-
-#include <string>
-
-#include "base/misc.hh"
-#include "mem/cache/miss/mshr.hh"
-#include "mem/cache/cache.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
-#include "sim/builder.hh"
-
-using namespace std;
-
-
-CoherenceProtocol::StateTransition::StateTransition()
-    : busCmd(MemCmd::InvalidCmd), newState(-1), snoopFunc(invalidTransition)
-{
-}
-
-
-void
-CoherenceProtocol::regStats()
-{
-    // Even though we count all the possible transitions in the
-    // requestCount and snoopCount arrays, most of these are invalid,
-    // so we just select the interesting ones to print here.
-
-    requestCount[Invalid][MemCmd::ReadReq]
-        .name(name() + ".read_invalid")
-        .desc("read misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::WriteReq]
-        .name(name() +".write_invalid")
-        .desc("write misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::SoftPFReq]
-        .name(name() +".swpf_invalid")
-        .desc("soft prefetch misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::HardPFReq]
-        .name(name() +".hwpf_invalid")
-        .desc("hard prefetch misses to invalid blocks")
-        ;
-
-    requestCount[Shared][MemCmd::WriteReq]
-        .name(name() + ".write_shared")
-        .desc("write misses to shared blocks")
-        ;
-
-    requestCount[Owned][MemCmd::WriteReq]
-        .name(name() + ".write_owned")
-        .desc("write misses to owned blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_shared")
-        .desc("read snoops on shared blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_shared")
-        .desc("readEx snoops on shared blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::UpgradeReq]
-        .name(name() + ".snoop_upgrade_shared")
-        .desc("upgradee snoops on shared blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_modified")
-        .desc("read snoops on modified blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_modified")
-        .desc("readEx snoops on modified blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_owned")
-        .desc("read snoops on owned blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_owned")
-        .desc("readEx snoops on owned blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::UpgradeReq]
-        .name(name() + ".snoop_upgrade_owned")
-        .desc("upgrade snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_exclusive")
-        .desc("read snoops on exclusive blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_exclusive")
-        .desc("readEx snoops on exclusive blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_shared")
-        .desc("WriteInvalidate snoops on shared blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_owned")
-        .desc("WriteInvalidate snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_exclusive")
-        .desc("WriteInvalidate snoops on exclusive blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_modified")
-        .desc("WriteInvalidate snoops on modified blocks")
-        ;
-
-    snoopCount[Invalid][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_invalid")
-        .desc("WriteInvalidate snoops on invalid blocks")
-        ;
-}
-
-
-bool
-CoherenceProtocol::invalidateTrans(BaseCache *cache, PacketPtr &pkt,
-                                   CacheBlk *blk, MSHR *mshr,
-                                   CacheBlk::State & new_state)
-{
-    // invalidate the block
-    new_state = (blk->status & ~stateMask) | Invalid;
-    return false;
-}
-
-
-bool
-CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt,
-                               CacheBlk *blk,
-                               MSHR *mshr,
-                               CacheBlk::State & new_state)
-{
-    return true;
-}
-
-
-bool
-CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Shared;
-    pkt->assertShared();
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-
-bool
-CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt,
-                                           CacheBlk *blk,
-                                           MSHR *mshr,
-                                           CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Owned;
-    pkt->assertShared();
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-
-bool
-CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Invalid;
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-bool
-CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Shared;
-    pkt->assertShared();
-    return false;
-}
-
-CoherenceProtocol::CoherenceProtocol(const string &name,
-                                     const string &protocol,
-                                     const bool doUpgrades)
-    : SimObject(name)
-{
-    // Python should catch this, but in case it doesn't...
-    if (!(protocol == "msi"  || protocol == "mesi" ||
-          protocol == "mosi" || protocol == "moesi")) {
-        fatal("CoherenceProtocol: unrecognized protocol %s\n",  protocol);
-    }
-
-    bool hasOwned = (protocol == "mosi" || protocol == "moesi");
-    bool hasExclusive = (protocol == "mesi" || protocol == "moesi");
-
-    if (hasOwned && !doUpgrades) {
-        fatal("CoherenceProtocol: ownership protocols require upgrade "
-              "transactions\n(write miss on owned block generates ReadExcl, "
-              "which will clobber dirty block)\n");
-    }
-
-    // set up a few shortcuts to save typing & visual clutter
-    typedef MemCmd MC;
-    StateTransition (&tt)[stateMax+1][MC::NUM_MEM_CMDS] = transitionTable;
-
-    MC::Command writeToSharedCmd =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
-    MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;
-
-    // Note that all transitions by default cause a panic.
-    // Override the valid transitions with the appropriate actions here.
-
-    //
-    // ----- incoming requests: specify outgoing bus request -----
-    //
-    tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq);
-    // we only support write allocate right now
-    tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
-    tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
-    tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
-    tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
-    if (hasOwned) {
-        tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd);
-        tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq);
-        tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd);
-    }
-
-    // Prefetching causes a read
-    tt[Invalid][MC::SoftPFReq].onRequest(MC::ReadReq);
-    tt[Invalid][MC::HardPFReq].onRequest(MC::ReadReq);
-
-    //
-    // ----- on response to given request: specify new state -----
-    //
-    tt[Invalid][MC::ReadExResp].onResponse(Modified);
-    tt[Shared][writeToSharedResp].onResponse(Modified);
-    // Go to Exclusive state on read response if we have one (will
-    // move into shared if the shared line is asserted in the
-    // getNewState function)
-    //
-    // originally had this as:
-    // tt[Invalid][MC::ReadResp].onResponse(hasExclusive ? Exclusive: Shared);
-    // ...but for some reason that caused a link error...
-    if (hasExclusive) {
-        tt[Invalid][MC::ReadResp].onResponse(Exclusive);
-    } else {
-        tt[Invalid][MC::ReadResp].onResponse(Shared);
-    }
-    if (hasOwned) {
-        tt[Owned][writeToSharedResp].onResponse(Modified);
-    }
-
-    //
-    // ----- bus snoop transition functions -----
-    //
-    tt[Invalid][MC::ReadReq].onSnoop(nullTransition);
-    tt[Invalid][MC::ReadExReq].onSnoop(nullTransition);
-    tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::ReadReq].onSnoop(hasExclusive
-                                   ? assertShared : nullTransition);
-    tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    if (doUpgrades) {
-        tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition);
-        tt[Shared][MC::UpgradeReq].onSnoop(invalidateTrans);
-    }
-    tt[Modified][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-    tt[Modified][MC::ReadReq].onSnoop(hasOwned
-                                     ? supplyAndGotoOwnedTrans
-                                     : supplyAndGotoSharedTrans);
-    tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-
-    if (hasExclusive) {
-        tt[Exclusive][MC::ReadReq].onSnoop(assertShared);
-        tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans);
-        tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    }
-
-    if (hasOwned) {
-        tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans);
-        tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    }
-
-    // @todo add in hardware prefetch to this list
-}
-
-
-MemCmd
-CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state,
-                             MSHR *mshr)
-{
-    state &= stateMask;
-    int cmd_idx = cmdIn.toInt();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-    MemCmd::Command cmdOut = transitionTable[state][cmd_idx].busCmd;
-
-    assert(cmdOut != MemCmd::InvalidCmd);
-
-    ++requestCount[state][cmd_idx];
-
-    return cmdOut;
-}
-
-
-CacheBlk::State
-CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState)
-{
-    CacheBlk::State state = oldState & stateMask;
-    int cmd_idx = pkt->cmdToIndex();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-    CacheBlk::State newState = transitionTable[state][cmd_idx].newState;
-
-    //Check if it's exclusive and the shared line was asserted,
-    //then  goto shared instead
-    if (newState == Exclusive && pkt->sharedAsserted()) {
-        newState = Shared;
-    }
-
-    assert(newState != -1);
-
-    //Make sure not to loose any other state information
-    newState = (oldState & ~stateMask) | newState;
-    return newState;
-}
-
-
-bool
-CoherenceProtocol::handleBusRequest(BaseCache *cache, PacketPtr &pkt,
-                                    CacheBlk *blk,
-                                    MSHR *mshr,
-                                    CacheBlk::State & new_state)
-{
-    if (blk == NULL) {
-        // nothing to do if we don't have a block
-        return false;
-    }
-
-    CacheBlk::State state = blk->status & stateMask;
-    int cmd_idx = pkt->cmdToIndex();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-//    assert(mshr == NULL); // can't currently handle outstanding requests
-    //Check first if MSHR, and also insure, if there is one, that it is not in service
-    assert(!mshr || mshr->inService == 0);
-    ++snoopCount[state][cmd_idx];
-
-    bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr,
-                                                     new_state);
-
-
-
-    return ret;
-}
-
-bool
-CoherenceProtocol::nullTransition(BaseCache *cache, PacketPtr &pkt,
-                                  CacheBlk *blk, MSHR *mshr,
-                                  CacheBlk::State & new_state)
-{
-    // do nothing
-    if (blk)
-        new_state = blk->status;
-    return false;
-}
-
-
-bool
-CoherenceProtocol::invalidTransition(BaseCache *cache, PacketPtr &pkt,
-                                     CacheBlk *blk, MSHR *mshr,
-                                     CacheBlk::State & new_state)
-{
-    panic("Invalid transition");
-    return false;
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-    Param<string> protocol;
-    Param<bool> do_upgrades;
-
-END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-    INIT_PARAM(protocol, "name of coherence protocol"),
-    INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true)
-
-END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-
-CREATE_SIM_OBJECT(CoherenceProtocol)
-{
-    return new CoherenceProtocol(getInstanceName(), protocol,
-                                 do_upgrades);
-}
-
-REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol)
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
deleted file mode 100644
index 4b8024582..000000000
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- *          Steve Reinhardt
- */
-
-/**
- * @file
- * Declaration of CoherenceProcotol a basic coherence policy.
- */
-#ifndef __COHERENCE_PROTOCOL_HH__
-#define __COHERENCE_PROTOCOL_HH__
-
-#include <string>
-
-#include "sim/sim_object.hh"
-#include "mem/packet.hh"
-#include "mem/cache/cache_blk.hh"
-#include "base/statistics.hh"
-
-class BaseCache;
-class MSHR;
-
-/**
- * A simple coherence policy for the memory hierarchy. Currently implements
- * MSI, MESI, and MOESI protocols.
- */
-class CoherenceProtocol : public SimObject
-{
-  public:
-    /**
-     * Contruct and initialize this policy.
-     * @param name The name of this policy.
-     * @param protocol The string representation of the protocol to use.
-     * @param doUpgrades True if bus upgrades should be used.
-     */
-    CoherenceProtocol(const std::string &name, const std::string &protocol,
-                      const bool doUpgrades);
-
-    /**
-     * Destructor.
-     */
-    virtual ~CoherenceProtocol() {};
-
-    /**
-     * Register statistics
-     */
-    virtual void regStats();
-
-    /**
-     * Get the proper bus command for the given command and status.
-     * @param cmd The request's command.
-     * @param status The current state of the cache block.
-     * @param mshr The MSHR matching the request.
-     * @return The proper bus command, as determined by the protocol.
-     */
-    MemCmd getBusCmd(MemCmd cmd, CacheBlk::State status,
-                         MSHR *mshr = NULL);
-
-    /**
-     * Return the proper state given the current state and the bus response.
-     * @param pkt The bus response.
-     * @param oldState The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr pkt,
-                                CacheBlk::State oldState = 0);
-
-    /**
-     * Handle snooped bus requests.
-     * @param cache The cache that snooped the request.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state The new coherence state of the block.
-     * @return True if the request should be satisfied locally.
-     */
-    bool handleBusRequest(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk,
-                          MSHR *mshr, CacheBlk::State &new_state);
-
-  protected:
-    /** Snoop function type. */
-    typedef bool (*SnoopFuncType)(BaseCache *, PacketPtr &, CacheBlk *,
-                                  MSHR *, CacheBlk::State&);
-
-    //
-    // Standard snoop transition functions
-    //
-
-    /**
-     * Do nothing transition.
-     */
-    static bool nullTransition(BaseCache *, PacketPtr &, CacheBlk *,
-                               MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalid transition, basically panic.
-     */
-    static bool invalidTransition(BaseCache *, PacketPtr &, CacheBlk *,
-                                  MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalidate block, move to Invalid state.
-     */
-    static bool invalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data, no state transition.
-     */
-    static bool supplyTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                            MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data and go to Shared state.
-     */
-    static bool supplyAndGotoSharedTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data and go to Owned state.
-     */
-    static bool supplyAndGotoOwnedTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                        MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalidate block, supply data, and go to Invalid state.
-     */
-    static bool supplyAndInvalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Assert the shared line for a block that is shared/exclusive.
-     */
-    static bool assertShared(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Definition of protocol state transitions.
-     */
-    class StateTransition
-    {
-        friend class CoherenceProtocol;
-
-        /** The bus command of this transition. */
-        Packet::Command busCmd;
-        /** The state to transition to. */
-        int newState;
-        /** The snoop function for this transition. */
-        SnoopFuncType snoopFunc;
-
-        /**
-         * Constructor, defaults to invalid transition.
-         */
-        StateTransition();
-
-        /**
-         * Initialize bus command.
-         * @param cmd The bus command to use.
-         */
-        void onRequest(Packet::Command cmd)
-        {
-            busCmd = cmd;
-        }
-
-        /**
-         * Set the transition state.
-         * @param s The new state.
-         */
-        void onResponse(CacheBlk::State s)
-        {
-            newState = s;
-        }
-
-        /**
-         * Initialize the snoop function.
-         * @param f The new snoop function.
-         */
-        void onSnoop(SnoopFuncType f)
-        {
-            snoopFunc = f;
-        }
-    };
-
-    friend class CoherenceProtocol::StateTransition;
-
-    /** Mask to select status bits relevant to coherence protocol. */
-    static const int stateMask = BlkValid | BlkWritable | BlkDirty;
-
-    /** The Modified (M) state. */
-    static const int Modified = BlkValid | BlkWritable | BlkDirty;
-    /** The Owned (O) state. */
-    static const int Owned = BlkValid | BlkDirty;
-    /** The Exclusive (E) state. */
-    static const int Exclusive = BlkValid | BlkWritable;
-    /** The Shared (S) state. */
-    static const int Shared = BlkValid;
-    /** The Invalid (I) state. */
-    static const int Invalid = 0;
-
-    /**
-     * Maximum state encoding value (used to size transition lookup
-     * table).  Could be more than number of states, depends on
-     * encoding of status bits.
-     */
-    static const int stateMax = stateMask;
-
-    /**
-     * The table of all possible transitions, organized by starting state and
-     * request command.
-     */
-    StateTransition transitionTable[stateMax+1][MemCmd::NUM_MEM_CMDS];
-
-    /**
-     * @addtogroup CoherenceStatistics
-     * @{
-     */
-    /**
-     * State accesses from parent cache.
-     */
-    Stats::Scalar<> requestCount[stateMax+1][MemCmd::NUM_MEM_CMDS];
-    /**
-     * State accesses from snooped requests.
-     */
-    Stats::Scalar<> snoopCount[stateMax+1][MemCmd::NUM_MEM_CMDS];
-    /**
-     * @}
-     */
-};
-
-#endif // __COHERENCE_PROTOCOL_HH__
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
deleted file mode 100644
index 214828ca7..000000000
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Declaration of a simple coherence policy.
- */
-
-#ifndef __SIMPLE_COHERENCE_HH__
-#define __SIMPLE_COHERENCE_HH__
-
-#include <string>
-
-#include "mem/packet.hh"
-#include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
-
-class BaseCache;
-
-/**
- * A simple MP coherence policy. This policy assumes an atomic bus and only one
- * level of cache.
- */
-class SimpleCoherence
-{
-  protected:
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-    /** Pointer to the coherence protocol. */
-    CoherenceProtocol *protocol;
-
-  public:
-    /**
-     * Construct and initialize this coherence policy.
-     * @param _protocol The coherence protocol to use.
-     */
-    SimpleCoherence(CoherenceProtocol *_protocol)
-        : protocol(_protocol)
-    {
-    }
-
-    /**
-     * Set the pointer to the parent cache.
-     * @param _cache The parent cache.
-     */
-    void setCache(BaseCache *_cache)
-    {
-        cache = _cache;
-    }
-
-    /**
-     * Register statistics.
-     * @param name The name to prepend to stat descriptions.
-     */
-    void regStats(const std::string &name)
-    {
-    }
-
-    /**
-     * This policy does not forward invalidates, return NULL.
-     * @return NULL.
-     */
-    PacketPtr getPacket()
-    {
-        return NULL;
-    }
-
-    /**
-     * Return the proper state given the current state and the bus response.
-     * @param pkt The bus response.
-     * @param current The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr pkt,
-                                CacheBlk::State current = 0)
-    {
-        return protocol->getNewState(pkt, current);
-    }
-
-    /**
-     * Handle snooped bus requests.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state Return the new state for the block.
-     */
-    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                          CacheBlk::State &new_state)
-    {
-//	assert(mshr == NULL);
-//Got rid of, there could be an MSHR, but it can't be in service
-        if (blk != NULL)
-        {
-            if (pkt->cmd != MemCmd::Writeback) {
-                return protocol->handleBusRequest(cache, pkt, blk, mshr,
-                                              new_state);
-            }
-            else { //It is a writeback, must be ownership protocol, just keep state
-                new_state = blk->status;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * Get the proper bus command for the given command and status.
-     * @param cmd The request's command.
-     * @param state The current state of the cache block.
-     * @return The proper bus command, as determined by the protocol.
-     */
-    MemCmd getBusCmd(MemCmd cmd,
-                                  CacheBlk::State state)
-    {
-        if (cmd == MemCmd::Writeback) return MemCmd::Writeback;
-        return protocol->getBusCmd(cmd, state);
-    }
-
-    /**
-     * Return true if this coherence policy can handle fast cache writes.
-     */
-    bool allowFastWrites() { return false; }
-
-    bool hasProtocol() { return true; }
-};
-
-#endif //__SIMPLE_COHERENCE_HH__
-
-
-
-
-
-
-
-
-- 
cgit v1.2.3


From 6ab53415efe3e06c06589a8a6ef38185ff6f94b7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 10:16:18 -0700
Subject: Get rid of Packet result field.  Error responses are now encoded in
 cmd field.

--HG--
extra : convert_revision : d67819b7e3ee4b9a5bf08541104de0a89485e90b
---
 src/arch/sparc/tlb.cc          |   4 +-
 src/cpu/o3/fetch_impl.hh       |   6 --
 src/cpu/o3/lsq_unit.hh         |   6 --
 src/cpu/o3/lsq_unit_impl.hh    |  26 -------
 src/cpu/ozone/lw_lsq.hh        |  10 ---
 src/cpu/ozone/lw_lsq_impl.hh   |  18 -----
 src/cpu/simple/atomic.cc       |  76 ++++++------------
 src/cpu/simple/atomic.hh       |  10 +--
 src/cpu/simple/base.hh         |   3 -
 src/cpu/simple/timing.cc       |   8 +-
 src/dev/alpha/console.cc       |  11 +--
 src/dev/alpha/tsunami_cchip.cc |   5 +-
 src/dev/alpha/tsunami_io.cc    |   6 +-
 src/dev/alpha/tsunami_pchip.cc |   6 +-
 src/dev/i8254xGBe.cc           |   4 +-
 src/dev/ide_ctrl.cc            |  14 ++--
 src/dev/io_device.cc           |   4 +-
 src/dev/isa_fake.cc            |   9 +--
 src/dev/ns_gige.cc             |   8 +-
 src/dev/pciconfigall.cc        |   4 +-
 src/dev/pcidev.cc              |   5 +-
 src/dev/sparc/dtod.cc          |   3 +-
 src/dev/sparc/iob.cc           |   5 +-
 src/dev/sparc/mm_disk.cc       |   6 +-
 src/dev/uart8250.cc            |   6 +-
 src/mem/bridge.cc              |  17 ++---
 src/mem/bridge.hh              |   2 +-
 src/mem/bus.cc                 |  30 ++++----
 src/mem/cache/base_cache.cc    |   2 +-
 src/mem/cache/cache_impl.hh    |  12 ++-
 src/mem/packet.cc              |  19 ++---
 src/mem/packet.hh              | 169 ++++++++++++++++++-----------------------
 src/mem/physical.cc            |   2 +-
 src/mem/port.cc                |   3 +-
 src/mem/tport.cc               |  21 +++--
 35 files changed, 199 insertions(+), 341 deletions(-)

diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc
index 09266fd6e..68df19618 100644
--- a/src/arch/sparc/tlb.cc
+++ b/src/arch/sparc/tlb.cc
@@ -1023,7 +1023,7 @@ doMmuReadError:
         panic("need to impl DTB::doMmuRegRead() got asi=%#x, va=%#x\n",
             (uint32_t)asi, va);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return tc->getCpuPtr()->cycles(1);
 }
 
@@ -1268,7 +1268,7 @@ doMmuWriteError:
         panic("need to impl DTB::doMmuRegWrite() got asi=%#x, va=%#x d=%#x\n",
             (uint32_t)pkt->req->getAsi(), pkt->getAddr(), data);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return tc->getCpuPtr()->cycles(1);
 }
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 01e9b5b31..aa0c69ac4 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -628,12 +628,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         // Now do the timing access to see whether or not the instruction
         // exists within the cache.
         if (!icachePort->sendTiming(data_pkt)) {
-            if (data_pkt->result == Packet::BadAddress) {
-                fault = TheISA::genMachineCheckFault();
-                delete mem_req;
-                memReq[tid] = NULL;
-                warn("Bad address!\n");
-            }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index cc33e025d..d964b9f9f 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -653,8 +653,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         data_pkt->senderState = state;
 
         if (!dcachePort->sendTiming(data_pkt)) {
-            Packet::Result result = data_pkt->result;
-
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
@@ -663,10 +661,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
             req = NULL;
 
-            if (result == Packet::BadAddress) {
-                return TheISA::genMachineCheckFault();
-            }
-
             // If the access didn't succeed, tell the LSQ by setting
             // the retry thread id.
             lsq->setRetryTid(lsqID);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index bde4f8079..91e616589 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -690,9 +690,6 @@ LSQUnit<Impl>::writebackStores()
         }
 
         if (!dcachePort->sendTiming(data_pkt)) {
-            if (data_pkt->result == Packet::BadAddress) {
-                panic("LSQ sent out a bad address for a completed store!");
-            }
             // Need to handle becoming blocked on a store.
             DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
@@ -844,26 +841,6 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
 #endif
     }
 
-    if (pkt->result != Packet::Success) {
-        DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
-                storeWBIdx);
-
-        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-                storeQueue[storeWBIdx].inst->seqNum);
-
-        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
-        //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
-        // @todo: Increment stat here.
-    } else {
-        DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
-                storeWBIdx);
-
-        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-                storeQueue[storeWBIdx].inst->seqNum);
-    }
-
     incrStIdx(storeWBIdx);
 }
 
@@ -952,9 +929,6 @@ LSQUnit<Impl>::recvRetry()
         assert(retryPkt != NULL);
 
         if (dcachePort->sendTiming(retryPkt)) {
-            if (retryPkt->result == Packet::BadAddress) {
-                panic("LSQ sent out a bad address for a completed store!");
-            }
             storePostSend(retryPkt);
             retryPkt = NULL;
             isStoreBlocked = false;
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 2048ad6bb..d9e0d04ac 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -661,16 +661,6 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
         cpu->lockFlag = true;
     }
 
-    if (data_pkt->result != Packet::Success) {
-        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
-                inst->seqNum);
-    } else {
-        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-                inst->seqNum);
-    }
-
     return NoFault;
 }
 
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index f26b06453..eefc0df83 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -853,24 +853,6 @@ OzoneLWLSQ<Impl>::storePostSend(PacketPtr pkt, DynInstPtr &inst)
         }
 #endif
     }
-
-    if (pkt->result != Packet::Success) {
-        DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
-
-        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-                inst->seqNum);
-
-        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
-        //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
-        // @todo: Increment stat here.
-    } else {
-        DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n");
-
-        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-                inst->seqNum);
-    }
 }
 
 template <class Impl>
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 03ff1282b..bcd6662c8 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -148,23 +148,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
     icachePort.snoopRangeSent = false;
     dcachePort.snoopRangeSent = false;
 
-    ifetch_req = new Request();
-    ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
-    ifetch_pkt = new Packet(ifetch_req, MemCmd::ReadReq, Packet::Broadcast);
-    ifetch_pkt->dataStatic(&inst);
-
-    data_read_req = new Request();
-    data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
-    data_read_pkt = new Packet(data_read_req, MemCmd::ReadReq,
-                               Packet::Broadcast);
-    data_read_pkt->dataStatic(&dataReg);
-
-    data_write_req = new Request();
-    data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
-    data_write_pkt = new Packet(data_write_req, MemCmd::WriteReq,
-                                Packet::Broadcast);
-    data_swap_pkt = new Packet(data_write_req, MemCmd::SwapReq,
-                                Packet::Broadcast);
+    ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
+    data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
 }
 
 
@@ -282,9 +268,7 @@ Fault
 AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
     // use the CPU's statically allocated read request and packet objects
-    Request *req = data_read_req;
-    PacketPtr pkt = data_read_pkt;
-
+    Request *req = &data_read_req;
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     if (traceData) {
@@ -296,19 +280,15 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 
     // Now do the access.
     if (fault == NoFault) {
-        pkt->reinitFromRequest();
+        Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        pkt.dataStatic(&data);
 
         if (req->isMmapedIpr())
-            dcache_latency = TheISA::handleIprRead(thread->getTC(),pkt);
+            dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt);
         else
-            dcache_latency = dcachePort.sendAtomic(pkt);
+            dcache_latency = dcachePort.sendAtomic(&pkt);
         dcache_access = true;
-#if !defined(NDEBUG)
-        if (pkt->result != Packet::Success)
-            panic("Unable to find responder for address pa = %#X va = %#X\n",
-                    pkt->req->getPaddr(), pkt->req->getVaddr());
-#endif
-        data = pkt->get<T>();
+        assert(!pkt.isError());
 
         if (req->isLocked()) {
             TheISA::handleLockedRead(thread, req);
@@ -378,16 +358,9 @@ Fault
 AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
     // use the CPU's statically allocated write request and packet objects
-    Request *req = data_write_req;
-    PacketPtr pkt;
-
+    Request *req = &data_write_req;
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
-    if (req->isSwap())
-        pkt = data_swap_pkt;
-    else
-        pkt = data_write_pkt;
-
     if (traceData) {
         traceData->setAddr(addr);
     }
@@ -397,6 +370,11 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
+        Packet pkt =
+            Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq,
+                   Packet::Broadcast);
+        pkt.dataStatic(&data);
+
         bool do_access = true;  // flag to suppress cache access
 
         if (req->isLocked()) {
@@ -409,27 +387,19 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
 
         if (do_access) {
-            pkt->reinitFromRequest();
-            pkt->dataStatic(&data);
-
             if (req->isMmapedIpr()) {
-                dcache_latency = TheISA::handleIprWrite(thread->getTC(), pkt);
+                dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt);
             } else {
                 data = htog(data);
-                dcache_latency = dcachePort.sendAtomic(pkt);
+                dcache_latency = dcachePort.sendAtomic(&pkt);
             }
             dcache_access = true;
-
-#if !defined(NDEBUG)
-            if (pkt->result != Packet::Success)
-                panic("Unable to find responder for address pa = %#X va = %#X\n",
-                        pkt->req->getPaddr(), pkt->req->getVaddr());
-#endif
+            assert(!pkt.isError());
         }
 
         if (req->isSwap()) {
             assert(res);
-            *res = pkt->get<T>();
+            *res = pkt.get<T>();
         } else if (res) {
             *res = req->getExtraData();
         }
@@ -513,7 +483,7 @@ AtomicSimpleCPU::tick()
         if (!curStaticInst || !curStaticInst->isDelayedCommit())
             checkForInterrupts();
 
-        Fault fault = setupFetchRequest(ifetch_req);
+        Fault fault = setupFetchRequest(&ifetch_req);
 
         if (fault == NoFault) {
             Tick icache_latency = 0;
@@ -524,9 +494,11 @@ AtomicSimpleCPU::tick()
             //if(predecoder.needMoreBytes())
             //{
                 icache_access = true;
-                ifetch_pkt->reinitFromRequest();
+                Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
+                                           Packet::Broadcast);
+                ifetch_pkt.dataStatic(&inst);
 
-                icache_latency = icachePort.sendAtomic(ifetch_pkt);
+                icache_latency = icachePort.sendAtomic(&ifetch_pkt);
                 // ifetch_req is initialized to read the instruction directly
                 // into the CPU object's inst field.
             //}
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index b127e3791..28e883b24 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -121,13 +121,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
     };
     DcachePort dcachePort;
 
-    Request  *ifetch_req;
-    PacketPtr ifetch_pkt;
-    Request  *data_read_req;
-    PacketPtr data_read_pkt;
-    Request  *data_write_req;
-    PacketPtr data_write_pkt;
-    PacketPtr data_swap_pkt;
+    Request ifetch_req;
+    Request data_read_req;
+    Request data_write_req;
 
     bool dcache_access;
     Tick dcache_latency;
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 243167db0..0550aa036 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -131,9 +131,6 @@ class BaseSimpleCPU : public BaseCPU
     // The predecoder
     TheISA::Predecoder predecoder;
 
-    // Static data storage
-    TheISA::LargestRead dataReg;
-
     StaticInstPtr curStaticInst;
     StaticInstPtr curMacroStaticInst;
 
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 7698a588d..b4e4a4433 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -501,7 +501,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
 {
     // received a response from the icache: execute the received
     // instruction
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     assert(_status == IcacheWaitResponse);
 
     _status = Running;
@@ -569,7 +569,7 @@ TimingSimpleCPU::IcachePort::recvTiming(PacketPtr pkt)
 
         return true;
     }
-    else if (pkt->result == Packet::Nacked) {
+    else if (pkt->wasNacked()) {
         assert(cpu->_status == IcacheWaitResponse);
         pkt->reinitNacked();
         if (!sendTiming(pkt)) {
@@ -600,7 +600,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 {
     // received a response from the dcache: complete the load or store
     // instruction
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     assert(_status == DcacheWaitResponse);
     _status = Running;
 
@@ -663,7 +663,7 @@ TimingSimpleCPU::DcachePort::recvTiming(PacketPtr pkt)
 
         return true;
     }
-    else if (pkt->result == Packet::Nacked) {
+    else if (pkt->wasNacked()) {
         assert(cpu->_status == DcacheWaitResponse);
         pkt->reinitNacked();
         if (!sendTiming(pkt)) {
diff --git a/src/dev/alpha/console.cc b/src/dev/alpha/console.cc
index 443f376a5..55549a154 100644
--- a/src/dev/alpha/console.cc
+++ b/src/dev/alpha/console.cc
@@ -102,7 +102,6 @@ AlphaConsole::read(PacketPtr pkt)
      * machine dependent address swizzle is required?
      */
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr daddr = pkt->getAddr() - pioAddr;
@@ -130,7 +129,7 @@ AlphaConsole::read(PacketPtr pkt)
                     /* Old console code read in everyting as a 32bit int
                      * we now break that for better error checking.
                      */
-                  pkt->result = Packet::BadAddress;
+                  pkt->setBadAddress();
             }
             DPRINTF(AlphaConsole, "read: offset=%#x val=%#x\n", daddr,
                     pkt->get<uint32_t>());
@@ -187,17 +186,15 @@ AlphaConsole::read(PacketPtr pkt)
                     pkt->get<uint64_t>());
             break;
         default:
-          pkt->result = Packet::BadAddress;
+          pkt->setBadAddress();
     }
-    if (pkt->result == Packet::Unknown)
-        pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
 Tick
 AlphaConsole::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = pkt->getAddr() - pioAddr;
 
@@ -245,7 +242,7 @@ AlphaConsole::write(PacketPtr pkt)
             panic("Unknown 64bit access, %#x\n", daddr);
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
 
     return pioDelay;
 }
diff --git a/src/dev/alpha/tsunami_cchip.cc b/src/dev/alpha/tsunami_cchip.cc
index 118160adf..a7175d90c 100644
--- a/src/dev/alpha/tsunami_cchip.cc
+++ b/src/dev/alpha/tsunami_cchip.cc
@@ -78,7 +78,6 @@ TsunamiCChip::read(PacketPtr pkt)
 {
     DPRINTF(Tsunami, "read  va=%#x size=%d\n", pkt->getAddr(), pkt->getSize());
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr regnum = (pkt->getAddr() - pioAddr) >> 6;
@@ -181,7 +180,7 @@ TsunamiCChip::read(PacketPtr pkt)
     DPRINTF(Tsunami, "Tsunami CChip: read  regnum=%#x size=%d data=%lld\n",
             regnum, pkt->getSize(), pkt->get<uint64_t>());
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -365,7 +364,7 @@ TsunamiCChip::write(PacketPtr pkt)
               panic("default in cchip read reached, accessing 0x%x\n");
         }  // swtich(regnum)
     } // not BIG_TSUNAMI write
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/alpha/tsunami_io.cc b/src/dev/alpha/tsunami_io.cc
index 58933428c..f59a06fba 100644
--- a/src/dev/alpha/tsunami_io.cc
+++ b/src/dev/alpha/tsunami_io.cc
@@ -461,7 +461,6 @@ TsunamiIO::frequency() const
 Tick
 TsunamiIO::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr daddr = pkt->getAddr() - pioAddr;
@@ -520,14 +519,13 @@ TsunamiIO::read(PacketPtr pkt)
     } else {
        panic("I/O Read - invalid size - va %#x size %d\n", pkt->getAddr(), pkt->getSize());
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
 Tick
 TsunamiIO::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = pkt->getAddr() - pioAddr;
 
@@ -600,7 +598,7 @@ TsunamiIO::write(PacketPtr pkt)
         panic("I/O Write - va%#x size %d data %#x\n", pkt->getAddr(), pkt->getSize(), pkt->get<uint8_t>());
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/alpha/tsunami_pchip.cc b/src/dev/alpha/tsunami_pchip.cc
index f30199337..be164e5b9 100644
--- a/src/dev/alpha/tsunami_pchip.cc
+++ b/src/dev/alpha/tsunami_pchip.cc
@@ -71,7 +71,6 @@ TsunamiPChip::TsunamiPChip(Params *p)
 Tick
 TsunamiPChip::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     pkt->allocate();
@@ -145,7 +144,7 @@ TsunamiPChip::read(PacketPtr pkt)
       default:
           panic("Default in PChip Read reached reading 0x%x\n", daddr);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 
 }
@@ -153,7 +152,6 @@ TsunamiPChip::read(PacketPtr pkt)
 Tick
 TsunamiPChip::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = (pkt->getAddr() - pioAddr) >> 6;
 
@@ -224,7 +222,7 @@ TsunamiPChip::write(PacketPtr pkt)
 
     } // uint64_t
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc
index baf13c49a..7ea4c704b 100644
--- a/src/dev/i8254xGBe.cc
+++ b/src/dev/i8254xGBe.cc
@@ -271,7 +271,7 @@ IGbE::read(PacketPtr pkt)
             pkt->set<uint32_t>(0);
     };
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -543,7 +543,7 @@ IGbE::write(PacketPtr pkt)
            panic("Write request to unknown register number: %#x\n", daddr);
     };
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 921ba1cd0..01243ae73 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -295,7 +295,7 @@ IdeController::readConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
@@ -403,7 +403,7 @@ IdeController::writeConfig(PacketPtr pkt)
             bm_enabled = false;
         break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 }
 
@@ -423,7 +423,7 @@ IdeController::read(PacketPtr pkt)
     parseAddr(pkt->getAddr(), offset, channel, reg_type);
 
     if (!io_enabled) {
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         return pioDelay;
     }
 
@@ -490,7 +490,7 @@ IdeController::read(PacketPtr pkt)
     DPRINTF(IdeCtrl, "read from offset: %#x size: %#x data: %#x\n",
             offset, pkt->getSize(), pkt->get<uint32_t>());
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -506,7 +506,7 @@ IdeController::write(PacketPtr pkt)
     parseAddr(pkt->getAddr(), offset, channel, reg_type);
 
     if (!io_enabled) {
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         DPRINTF(IdeCtrl, "io not enabled\n");
         return pioDelay;
     }
@@ -514,7 +514,7 @@ IdeController::write(PacketPtr pkt)
     switch (reg_type) {
       case BMI_BLOCK:
         if (!bm_enabled) {
-            pkt->result = Packet::Success;
+            pkt->makeAtomicResponse();
             return pioDelay;
         }
 
@@ -673,7 +673,7 @@ IdeController::write(PacketPtr pkt)
             offset, pkt->getSize(), pkt->get<uint32_t>());
 
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc
index ecbb391ef..806d13d07 100644
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -100,9 +100,7 @@ DmaPort::DmaPort(DmaDevice *dev, System *s)
 bool
 DmaPort::recvTiming(PacketPtr pkt)
 {
-
-
-    if (pkt->result == Packet::Nacked) {
+    if (pkt->wasNacked()) {
         DPRINTF(DMA, "Received nacked Pkt %#x with State: %#x Addr: %#x\n",
                pkt, pkt->senderState, pkt->getAddr());
 
diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc
index c36ddeb83..5cd0afb36 100644
--- a/src/dev/isa_fake.cc
+++ b/src/dev/isa_fake.cc
@@ -56,7 +56,6 @@ IsaFake::IsaFake(Params *p)
 Tick
 IsaFake::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     if (params()->warnAccess != "")
         warn("Device %s accessed by read to address %#x size=%d\n",
@@ -64,7 +63,7 @@ IsaFake::read(PacketPtr pkt)
     if (params()->retBadAddr) {
         DPRINTF(Tsunami, "read to bad address va=%#x size=%d\n",
                 pkt->getAddr(), pkt->getSize());
-        pkt->result = Packet::BadAddress;
+        pkt->setBadAddress();
     } else {
         assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
         DPRINTF(Tsunami, "read  va=%#x size=%d\n",
@@ -85,7 +84,7 @@ IsaFake::read(PacketPtr pkt)
           default:
             panic("invalid access size!\n");
         }
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
     }
     return pioDelay;
 }
@@ -117,7 +116,7 @@ IsaFake::write(PacketPtr pkt)
     if (params()->retBadAddr) {
         DPRINTF(Tsunami, "write to bad address va=%#x size=%d \n",
                 pkt->getAddr(), pkt->getSize());
-        pkt->result = Packet::BadAddress;
+        pkt->setBadAddress();
     } else {
         DPRINTF(Tsunami, "write - va=%#x size=%d \n",
                 pkt->getAddr(), pkt->getSize());
@@ -140,7 +139,7 @@ IsaFake::write(PacketPtr pkt)
                 panic("invalid access size!\n");
             }
         }
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
     }
     return pioDelay;
 }
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index e9d9c419d..86f664238 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt)
             ioEnable = false;
         break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 }
 
@@ -519,7 +519,7 @@ NSGigE::read(PacketPtr pkt)
         // doesn't actually DEPEND upon their values
         // MIB are just hardware stats keepers
         pkt->set<uint32_t>(0);
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         return pioDelay;
     } else if (daddr > 0x3FC)
         panic("Something is messed up!\n");
@@ -715,7 +715,7 @@ NSGigE::read(PacketPtr pkt)
         DPRINTF(EthernetPIO, "read from %#x: data=%d data=%#x\n",
                 daddr, reg, reg);
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -1122,7 +1122,7 @@ NSGigE::write(PacketPtr pkt)
     } else {
         panic("Invalid Request Size");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc
index bd1855847..b07ee1a49 100644
--- a/src/dev/pciconfigall.cc
+++ b/src/dev/pciconfigall.cc
@@ -54,7 +54,6 @@ PciConfigAll::PciConfigAll(Params *p)
 Tick
 PciConfigAll::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     pkt->allocate();
 
@@ -74,14 +73,13 @@ PciConfigAll::read(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return params()->pio_delay;
 }
 
 Tick
 PciConfigAll::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     panic("Attempting to write to config space on non-existant device\n");
     M5_DUMMY_RETURN
 }
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index c2a2bc02d..85337c841 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -68,7 +68,6 @@ PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid,
 Tick
 PciDev::PciConfigPort::recvAtomic(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= configAddr &&
            pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
     return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
@@ -156,7 +155,7 @@ PciDev::readConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
@@ -283,7 +282,7 @@ PciDev::writeConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
diff --git a/src/dev/sparc/dtod.cc b/src/dev/sparc/dtod.cc
index 42275c60a..22df873b6 100644
--- a/src/dev/sparc/dtod.cc
+++ b/src/dev/sparc/dtod.cc
@@ -74,7 +74,6 @@ DumbTOD::DumbTOD(Params *p)
 Tick
 DumbTOD::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 8);
 
@@ -82,7 +81,7 @@ DumbTOD::read(PacketPtr pkt)
     pkt->set(todTime);
     todTime += 1000;
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc
index e686e51f7..b27f45eba 100644
--- a/src/dev/sparc/iob.cc
+++ b/src/dev/sparc/iob.cc
@@ -72,7 +72,6 @@ Iob::Iob(Params *p)
 Tick
 Iob::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     if (pkt->getAddr() >= iobManAddr && pkt->getAddr() < iobManAddr + iobManSize)
         readIob(pkt);
@@ -81,7 +80,7 @@ Iob::read(PacketPtr pkt)
     else
         panic("Invalid address reached Iob\n");
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -176,7 +175,7 @@ Iob::write(PacketPtr pkt)
         panic("Invalid address reached Iob\n");
 
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/sparc/mm_disk.cc b/src/dev/sparc/mm_disk.cc
index 81c5c589a..bbb773c48 100644
--- a/src/dev/sparc/mm_disk.cc
+++ b/src/dev/sparc/mm_disk.cc
@@ -61,7 +61,6 @@ MmDisk::read(PacketPtr pkt)
     uint32_t d32;
     uint64_t d64;
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     accessAddr = pkt->getAddr() - pioAddr;
 
@@ -101,7 +100,7 @@ MmDisk::read(PacketPtr pkt)
         panic("Invalid access size\n");
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -115,7 +114,6 @@ MmDisk::write(PacketPtr pkt)
     uint32_t d32;
     uint64_t d64;
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     accessAddr = pkt->getAddr() - pioAddr;
 
@@ -157,7 +155,7 @@ MmDisk::write(PacketPtr pkt)
         panic("Invalid access size\n");
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc
index 50307aad4..0ad80e077 100644
--- a/src/dev/uart8250.cc
+++ b/src/dev/uart8250.cc
@@ -111,7 +111,6 @@ Uart8250::Uart8250(Params *p)
 Tick
 Uart8250::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 1);
 
@@ -186,7 +185,7 @@ Uart8250::read(PacketPtr pkt)
 /*    uint32_t d32 = *data;
     DPRINTF(Uart, "Register read to register %#x returned %#x\n", daddr, d32);
 */
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -194,7 +193,6 @@ Tick
 Uart8250::write(PacketPtr pkt)
 {
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 1);
 
@@ -272,7 +270,7 @@ Uart8250::write(PacketPtr pkt)
             panic("Tried to access a UART port that doesn't exist\n");
             break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index fb4574844..77178d518 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -121,14 +121,13 @@ Bridge::BridgePort::recvTiming(PacketPtr pkt)
                     otherPort->sendQueue.size(), otherPort->queuedRequests,
                     otherPort->outstandingResponses);
 
-    if (pkt->isRequest() && otherPort->reqQueueFull() && pkt->result !=
-            Packet::Nacked) {
+    if (pkt->isRequest() && otherPort->reqQueueFull() && !pkt->wasNacked()) {
         DPRINTF(BusBridge, "Remote queue full, nacking\n");
         nackRequest(pkt);
         return true;
     }
 
-    if (pkt->needsResponse() && pkt->result != Packet::Nacked)
+    if (pkt->needsResponse() && !pkt->wasNacked())
         if (respQueueFull()) {
             DPRINTF(BusBridge, "Local queue full, no space for response, nacking\n");
             DPRINTF(BusBridge, "queue size: %d outreq: %d outstanding resp: %d\n",
@@ -149,7 +148,7 @@ void
 Bridge::BridgePort::nackRequest(PacketPtr pkt)
 {
     // Nack the packet
-    pkt->result = Packet::Nacked;
+    pkt->setNacked();
     pkt->setDest(pkt->getSrc());
 
     //put it on the list to send
@@ -194,7 +193,7 @@ Bridge::BridgePort::nackRequest(PacketPtr pkt)
 void
 Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
 {
-    if (pkt->isResponse() || pkt->result == Packet::Nacked) {
+    if (pkt->isResponse() || pkt->wasNacked()) {
         // This is a response for a request we forwarded earlier.  The
         // corresponding PacketBuffer should be stored in the packet's
         // senderState field.
@@ -206,7 +205,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
 
         // Check if this packet was expecting a response and it's a nacked
         // packet, in which case we will never being seeing it
-        if (buf->expectResponse && pkt->result == Packet::Nacked)
+        if (buf->expectResponse && pkt->wasNacked())
             --outstandingResponses;
 
 
@@ -217,7 +216,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
     }
 
 
-    if (pkt->isRequest() && pkt->result != Packet::Nacked) {
+    if (pkt->isRequest() && !pkt->wasNacked()) {
         ++queuedRequests;
     }
 
@@ -251,7 +250,7 @@ Bridge::BridgePort::trySend()
 
     // Ugly! @todo When multilevel coherence works this will be removed
     if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite &&
-            pkt->result != Packet::Nacked) {
+            !pkt->wasNacked()) {
         PacketPtr funcPkt = new Packet(pkt->req, MemCmd::WriteReq,
                             Packet::Broadcast);
         funcPkt->dataStatic(pkt->getPtr<uint8_t>());
@@ -264,7 +263,7 @@ Bridge::BridgePort::trySend()
             buf->origSrc, pkt->getDest(), pkt->getAddr());
 
     bool wasReq = pkt->isRequest();
-    bool wasNacked = pkt->result == Packet::Nacked;
+    bool wasNacked = pkt->wasNacked();
 
     if (sendTiming(pkt)) {
         // send successful
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 89d626611..7af764437 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -86,7 +86,7 @@ class Bridge : public MemObject
                   expectResponse(_pkt->needsResponse() && !nack)
 
             {
-                if (!pkt->isResponse() && !nack && pkt->result != Packet::Nacked)
+                if (!pkt->isResponse() && !nack && !pkt->wasNacked())
                     pkt->senderState = this;
             }
 
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index ffd5e25a7..83ce0f87d 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -173,9 +173,8 @@ bool
 Bus::recvTiming(PacketPtr pkt)
 {
     Port *port;
-    DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s result %d\n",
-            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString(),
-            pkt->result);
+    DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
+            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
     BusPort *pktPort;
     if (pkt->getSrc() == defaultId)
@@ -329,6 +328,8 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder)
     // id after each
     int src_id = pkt->getSrc();
 
+    assert(pkt->isRequest()); // hasn't already been satisfied
+
     for (SnoopIter s_iter = snoopPorts.begin();
          s_iter != snoopPorts.end();
          s_iter++) {
@@ -336,7 +337,7 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder)
         if (p != responder && p->getId() != src_id) {
             p->sendFunctional(pkt);
         }
-        if (pkt->result == Packet::Success) {
+        if (pkt->isResponse()) {
             break;
         }
         pkt->setSrc(src_id);
@@ -369,14 +370,15 @@ Bus::recvAtomic(PacketPtr pkt)
     DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
+    assert(pkt->isRequest());
 
     // Variables for recording original command and snoop response (if
     // any)... if a snooper respondes, we will need to restore
     // original command so that additional snoops can take place
     // properly
     MemCmd orig_cmd = pkt->cmd;
-    Packet::Result response_result = Packet::Unknown;
     MemCmd response_cmd = MemCmd::InvalidCmd;
+    int orig_src = pkt->getSrc();
 
     Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
 
@@ -387,20 +389,18 @@ Bus::recvAtomic(PacketPtr pkt)
         assert(p != target_port);
         if (p->getId() != pkt->getSrc()) {
             p->sendAtomic(pkt);
-            if (pkt->result != Packet::Unknown) {
+            if (pkt->isResponse()) {
                 // response from snoop agent
                 assert(pkt->cmd != orig_cmd);
                 assert(pkt->memInhibitAsserted());
-                assert(pkt->isResponse());
                 // should only happen once
-                assert(response_result == Packet::Unknown);
                 assert(response_cmd == MemCmd::InvalidCmd);
                 // save response state
-                response_result = pkt->result;
                 response_cmd = pkt->cmd;
                 // restore original packet state for remaining snoopers
                 pkt->cmd = orig_cmd;
-                pkt->result = Packet::Unknown;
+                pkt->setSrc(orig_src);
+                pkt->setDest(Packet::Broadcast);
             }
         }
     }
@@ -408,13 +408,11 @@ Bus::recvAtomic(PacketPtr pkt)
     Tick response_time = target_port->sendAtomic(pkt);
 
     // if we got a response from a snooper, restore it here
-    if (response_result != Packet::Unknown) {
-        assert(response_cmd != MemCmd::InvalidCmd);
+    if (response_cmd != MemCmd::InvalidCmd) {
         // no one else should have responded
-        assert(pkt->result == Packet::Unknown);
+        assert(!pkt->isResponse());
         assert(pkt->cmd == orig_cmd);
         pkt->cmd = response_cmd;
-        pkt->result = response_result;
     }
 
     // why do we have this packet field and the return value both???
@@ -434,8 +432,8 @@ Bus::recvFunctional(PacketPtr pkt)
     Port* port = findPort(pkt->getAddr(), pkt->getSrc());
     functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
 
-    // If the snooping found what we were looking for, we're done.
-    if (pkt->result != Packet::Success && port) {
+    // If the snooping hasn't found what we were looking for, keep going.
+    if (!pkt->isResponse() && port) {
         port->sendFunctional(pkt);
     }
 }
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 5062d6e87..870658675 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -82,7 +82,7 @@ void
 BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         sendFunctional(pkt);
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b76d7e392..1823ea6b9 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -502,7 +502,6 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
 
     if (pkt->needsResponse()) {
         pkt->makeAtomicResponse();
-        pkt->result = Packet::Success;
     }
 
     return lat;
@@ -648,14 +647,13 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     assert(mshr);
 
-    if (pkt->result == Packet::Nacked) {
+    if (pkt->wasNacked()) {
         //pkt->reinitFromRequest();
         warn("NACKs from devices not connected to the same bus "
              "not implemented\n");
         return;
     }
-    assert(pkt->result != Packet::BadAddress);
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr());
 
     MSHRQueue *mq = mshr->queue;
@@ -1142,7 +1140,7 @@ void
 Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         myCache()->functionalAccess(pkt, cache->memSidePort);
 }
 
@@ -1180,7 +1178,7 @@ Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
     // this needs to be fixed so that the cache updates the mshr and sends the
     // packet back out on the link, but it probably won't happen so until this
     // gets fixed, just panic when it does
-    if (pkt->result == Packet::Nacked)
+    if (pkt->wasNacked())
         panic("Need to implement cache resending nacked packets!\n");
 
     if (pkt->isRequest() && blocked) {
@@ -1216,7 +1214,7 @@ void
 Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         myCache()->functionalAccess(pkt, cache->cpuSidePort);
 }
 
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index cd0ed8a2e..55fe13f3c 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -115,7 +115,13 @@ MemCmd::commandInfo[] =
         SwapResp, "SwapReq" },
     /* SwapResp -- for Swap ldstub type operations */
     { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData),
-        InvalidCmd, "SwapResp" }
+            InvalidCmd, "SwapResp" },
+    /* NetworkNackError  -- nacked at network layer (not by protocol) */
+    { SET2(IsRequest, IsError), InvalidCmd, "NetworkNackError" },
+    /* InvalidDestError  -- packet dest field invalid */
+    { SET2(IsRequest, IsError), InvalidCmd, "InvalidDestError" },
+    /* BadAddressError   -- memory address invalid */
+    { SET2(IsRequest, IsError), InvalidCmd, "BadAddressError" }
 };
 
 
@@ -205,7 +211,7 @@ Packet::checkFunctional(Addr addr, int size, uint8_t *data)
         if (func_start >= val_start && func_end <= val_end) {
             allocate();
             std::memcpy(getPtr<uint8_t>(), data + offset, getSize());
-            result = Packet::Success;
+            makeResponse();
             return true;
         } else {
             // In this case the timing packet only partially satisfies
@@ -245,15 +251,6 @@ operator<<(std::ostream &o, const Packet &p)
     o <<  p.getAddr() + p.getSize() - 1 << "] ";
     o.unsetf(std::ios_base::hex| std::ios_base::showbase);
 
-    if (p.result == Packet::Success)
-        o << "Successful ";
-    if (p.result == Packet::BadAddress)
-        o << "BadAddress ";
-    if (p.result == Packet::Nacked)
-        o << "Nacked ";
-    if (p.result == Packet::Unknown)
-        o << "Inflight ";
-
     if (p.isRead())
         o << "Read ";
     if (p.isWrite())
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index fc1c283ed..10b9f490c 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -84,6 +84,13 @@ class MemCmd
         StoreCondResp,
         SwapReq,
         SwapResp,
+        // Error responses
+        // @TODO these should be classified as responses rather than
+        // requests; coding them as requests initially for backwards
+        // compatibility
+        NetworkNackError,  // nacked at network layer (not by protocol)
+        InvalidDestError,  // packet dest field invalid
+        BadAddressError,   // memory address invalid
         NUM_MEM_CMDS
     };
 
@@ -103,6 +110,7 @@ class MemCmd
         IsHWPrefetch,
         IsLocked,       //!< Alpha/MIPS LL or SC access
         HasData,        //!< There is an associated payload
+        IsError,        //!< Error response
         NUM_COMMAND_ATTRIBUTES
     };
 
@@ -135,12 +143,13 @@ class MemCmd
     bool isWrite()  const       { return testCmdAttrib(IsWrite); }
     bool isRequest() const      { return testCmdAttrib(IsRequest); }
     bool isResponse() const     { return testCmdAttrib(IsResponse); }
-    bool needsExclusive() const  { return testCmdAttrib(NeedsExclusive); }
+    bool needsExclusive() const { return testCmdAttrib(NeedsExclusive); }
     bool needsResponse() const  { return testCmdAttrib(NeedsResponse); }
     bool isInvalidate() const   { return testCmdAttrib(IsInvalidate); }
     bool hasData() const        { return testCmdAttrib(HasData); }
     bool isReadWrite() const    { return isRead() && isWrite(); }
     bool isLocked() const       { return testCmdAttrib(IsLocked); }
+    bool isError() const        { return testCmdAttrib(IsError); }
 
     const Command responseCommand() const {
         return commandInfo[cmd].response;
@@ -184,6 +193,12 @@ class Packet : public FastAlloc
 
     typedef MemCmd::Command Command;
 
+    /** The command field of the packet. */
+    MemCmd cmd;
+
+    /** A pointer to the original request. */
+    RequestPtr req;
+
   private:
    /** A pointer to the data being transfered.  It can be differnt
     *    sizes at each level of the heirarchy so it belongs in the
@@ -223,19 +238,28 @@ class Packet : public FastAlloc
      *   (unlike * addr, size, and src). */
     short dest;
 
+    /** The original value of the command field.  Only valid when the
+     * current command field is an error condition; in that case, the
+     * previous contents of the command field are copied here.  This
+     * field is *not* set on non-error responses.
+     */
+    MemCmd origCmd;
+
     /** Are the 'addr' and 'size' fields valid? */
     bool addrSizeValid;
     /** Is the 'src' field valid? */
     bool srcValid;
+    bool destValid;
 
-    enum SnoopFlag {
+    enum Flag {
+        // Snoop flags
         MemInhibit,
         Shared,
-        NUM_SNOOP_FLAGS
+        NUM_PACKET_FLAGS
     };
 
-    /** Coherence snoopFlags for snooping */
-    std::bitset<NUM_SNOOP_FLAGS> snoopFlags;
+    /** Status flags */
+    std::bitset<NUM_PACKET_FLAGS> flags;
 
   public:
 
@@ -252,22 +276,6 @@ class Packet : public FastAlloc
      *   should be routed based on its address. */
     static const short Broadcast = -1;
 
-    /** A pointer to the original request. */
-    RequestPtr req;
-
-    /** A virtual base opaque structure used to hold coherence-related
-     *    state.  A specific subclass would be derived from this to
-     *    carry state specific to a particular coherence protocol.  */
-    class CoherenceState : public FastAlloc {
-      public:
-        virtual ~CoherenceState() {}
-    };
-
-    /** This packet's coherence state.  Caches should use
-     *   dynamic_cast<> to cast to the state appropriate for the
-     *   system's coherence protocol.  */
-    CoherenceState *coherence;
-
     /** A virtual base opaque structure used to hold state associated
      *    with the packet but specific to the sending device (e.g., an
      *    MSHR).  A pointer to this state is returned in the packet's
@@ -284,11 +292,6 @@ class Packet : public FastAlloc
      *   to cast to the state appropriate to the sender. */
     SenderState *senderState;
 
-  public:
-
-    /** The command field of the packet. */
-    MemCmd cmd;
-
     /** Return the string name of the cmd field (for debugging and
      *   tracing). */
     const std::string &cmdString() const { return cmd.toString(); }
@@ -296,68 +299,59 @@ class Packet : public FastAlloc
     /** Return the index of this command. */
     inline int cmdToIndex() const { return cmd.toInt(); }
 
-  public:
-
     bool isRead() const         { return cmd.isRead(); }
     bool isWrite()  const       { return cmd.isWrite(); }
     bool isRequest() const      { return cmd.isRequest(); }
     bool isResponse() const     { return cmd.isResponse(); }
-    bool needsExclusive() const  { return cmd.needsExclusive(); }
+    bool needsExclusive() const { return cmd.needsExclusive(); }
     bool needsResponse() const  { return cmd.needsResponse(); }
     bool isInvalidate() const   { return cmd.isInvalidate(); }
     bool hasData() const        { return cmd.hasData(); }
     bool isReadWrite() const    { return cmd.isReadWrite(); }
     bool isLocked() const       { return cmd.isLocked(); }
-
-    void assertMemInhibit()     { snoopFlags[MemInhibit] = true; }
-    void assertShared()         { snoopFlags[Shared] = true; }
-    bool memInhibitAsserted()   { return snoopFlags[MemInhibit]; }
-    bool sharedAsserted()       { return snoopFlags[Shared]; }
+    bool isError() const        { return cmd.isError(); }
+
+    // Snoop flags
+    void assertMemInhibit()     { flags[MemInhibit] = true; }
+    void assertShared()         { flags[Shared] = true; }
+    bool memInhibitAsserted()   { return flags[MemInhibit]; }
+    bool sharedAsserted()       { return flags[Shared]; }
+
+    // Network error conditions... encapsulate them as methods since
+    // their encoding keeps changing (from result field to command
+    // field, etc.)
+    void setNacked()     { origCmd = cmd; cmd = MemCmd::NetworkNackError; }
+    void setBadAddress() { origCmd = cmd; cmd = MemCmd::BadAddressError; }
+    bool wasNacked()     { return cmd == MemCmd::NetworkNackError; }
+    bool hadBadAddress() { return cmd == MemCmd::BadAddressError; }
 
     bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN }
 
-    /** Possible results of a packet's request. */
-    enum Result
-    {
-        Success,
-        BadAddress,
-        Nacked,
-        Unknown
-    };
-
-    /** The result of this packet's request. */
-    Result result;
-
     /** Accessor function that returns the source index of the packet. */
-    short getSrc() const { assert(srcValid); return src; }
+    short getSrc() const    { assert(srcValid); return src; }
     void setSrc(short _src) { src = _src; srcValid = true; }
     /** Reset source field, e.g. to retransmit packet on different bus. */
     void clearSrc() { srcValid = false; }
 
     /** Accessor function that returns the destination index of
         the packet. */
-    short getDest() const { return dest; }
-    void setDest(short _dest) { dest = _dest; }
+    short getDest() const     { assert(destValid); return dest; }
+    void setDest(short _dest) { dest = _dest; destValid = true; }
 
     Addr getAddr() const { assert(addrSizeValid); return addr; }
-    int getSize() const { assert(addrSizeValid); return size; }
+    int getSize() const  { assert(addrSizeValid); return size; }
     Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); }
 
-    void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; }
-    void cmdOverride(MemCmd newCmd) { cmd = newCmd; }
-
     /** Constructor.  Note that a Request object must be constructed
      *   first, but the Requests's physical address and size fields
      *   need not be valid. The command and destination addresses
      *   must be supplied.  */
     Packet(Request *_req, MemCmd _cmd, short _dest)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(_cmd), req(_req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr), size(_req->size), dest(_dest),
-           addrSizeValid(_req->validPaddr), srcValid(false),
-           snoopFlags(0),
-           time(curTick),
-           req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
-           result(Unknown)
+           addrSizeValid(_req->validPaddr), srcValid(false), destValid(true),
+           flags(0), time(curTick), senderState(NULL)
     {
     }
 
@@ -365,13 +359,11 @@ class Packet : public FastAlloc
      *  a request that is for a whole block, not the address from the req.
      *  this allows for overriding the size/addr of the req.*/
     Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(_cmd), req(_req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest),
-           addrSizeValid(_req->validPaddr), srcValid(false),
-           snoopFlags(0),
-           time(curTick),
-           req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
-           result(Unknown)
+           addrSizeValid(_req->validPaddr), srcValid(false), destValid(true),
+           flags(0), time(curTick), senderState(NULL)
     {
     }
 
@@ -382,15 +374,14 @@ class Packet : public FastAlloc
      * dynamic data, user must guarantee that the new packet's
      * lifetime is less than that of the original packet. */
     Packet(Packet *origPkt)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(origPkt->cmd), req(origPkt->req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
            src(origPkt->src), dest(origPkt->dest),
-           addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
-           snoopFlags(origPkt->snoopFlags),
-           time(curTick),
-           req(origPkt->req), coherence(origPkt->coherence),
-           senderState(origPkt->senderState), cmd(origPkt->cmd),
-           result(origPkt->result)
+           addrSizeValid(origPkt->addrSizeValid),
+           srcValid(origPkt->srcValid), destValid(origPkt->destValid),
+           flags(origPkt->flags),
+           time(curTick), senderState(origPkt->senderState)
     {
     }
 
@@ -405,12 +396,11 @@ class Packet : public FastAlloc
      *   multiple transactions. */
     void reinitFromRequest() {
         assert(req->validPaddr);
-        snoopFlags = 0;
+        flags = 0;
         addr = req->paddr;
         size = req->size;
         time = req->time;
         addrSizeValid = true;
-        result = Unknown;
         if (dynamicData) {
             deleteData();
             dynamicData = false;
@@ -424,34 +414,24 @@ class Packet : public FastAlloc
      * destination fields are *not* modified, as is appropriate for
      * atomic accesses.
      */
-    void makeAtomicResponse()
+    void makeResponse()
     {
         assert(needsResponse());
         assert(isRequest());
-        assert(result == Unknown);
         cmd = cmd.responseCommand();
-        result = Success;
+        dest = src;
+        destValid = srcValid;
+        srcValid = false;
     }
 
-    /**
-     * Perform the additional work required for timing responses above
-     * and beyond atomic responses; i.e., change the destination to
-     * point back to the requester and clear the source field.
-     */
-    void convertAtomicToTimingResponse()
+    void makeAtomicResponse()
     {
-        dest = getSrc();
-        srcValid = false;
+        makeResponse();
     }
 
-    /**
-     * Take a request packet and modify it in place to be suitable for
-     * returning as a response to a timing request.
-     */
     void makeTimingResponse()
     {
-        makeAtomicResponse();
-        convertAtomicToTimingResponse();
+        makeResponse();
     }
 
     /**
@@ -462,9 +442,10 @@ class Packet : public FastAlloc
     void
     reinitNacked()
     {
-        assert(needsResponse() && result == Nacked);
-        dest =  Broadcast;
-        result = Unknown;
+        assert(wasNacked());
+        cmd = origCmd;
+        assert(needsResponse());
+        setDest(Broadcast);
     }
 
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 93cba96c4..2742eca51 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -322,7 +322,7 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
               pkt->cmdString());
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
 }
 
 
diff --git a/src/mem/port.cc b/src/mem/port.cc
index e6ea773f2..ba4f23668 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -58,12 +58,11 @@ void
 Port::blobHelper(Addr addr, uint8_t *p, int size, MemCmd cmd)
 {
     Request req;
-    Packet pkt(&req, cmd, Packet::Broadcast);
 
     for (ChunkGenerator gen(addr, size, peerBlockSize());
          !gen.done(); gen.next()) {
         req.setPhys(gen.addr(), gen.size(), 0);
-        pkt.reinitFromRequest();
+        Packet pkt(&req, cmd, Packet::Broadcast);
         pkt.dataStatic(p);
         sendFunctional(&pkt);
         p += gen.size();
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 6c8c12ce2..d6ff64608 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -55,7 +55,7 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt)
     checkFunctional(pkt);
 
     // Just do an atomic access and throw away the returned latency
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         recvAtomic(pkt);
 }
 
@@ -68,7 +68,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // correctly with the drain code, so that would need to be fixed
     // if we ever added it back.
     assert(pkt->isRequest());
-    assert(pkt->result == Packet::Unknown);
 
     if (pkt->memInhibitAsserted()) {
         // snooper will supply based on copy of packet
@@ -85,7 +84,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
         // recvAtomic() should already have turned packet into
         // atomic response
         assert(pkt->isResponse());
-        pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
     } else {
         delete pkt->req;
@@ -138,12 +136,15 @@ void
 SimpleTimingPort::sendDeferredPacket()
 {
     assert(deferredPacketReady());
-    bool success = sendTiming(transmitList.front().pkt);
+    // take packet off list here; if recvTiming() on the other side
+    // calls sendTiming() back on us (like SimpleTimingCpu does), then
+    // we get confused by having a non-active packet on transmitList
+    DeferredPacket dp = transmitList.front();
+    transmitList.pop_front();
+    bool success = sendTiming(dp.pkt);
 
     if (success) {
-        //send successful, remove packet
-        transmitList.pop_front();
-        if (!transmitList.empty()) {
+        if (!transmitList.empty() && !sendEvent->scheduled()) {
             Tick time = transmitList.front().tick;
             sendEvent->schedule(time <= curTick ? curTick+1 : time);
         }
@@ -152,6 +153,12 @@ SimpleTimingPort::sendDeferredPacket()
             drainEvent->process();
             drainEvent = NULL;
         }
+    } else {
+        // Unsuccessful, need to put back on transmitList.  Callee
+        // should not have messed with it (since it didn't accept that
+        // packet), so we can just push it back on the front.
+        assert(!sendEvent->scheduled());
+        transmitList.push_front(dp);
     }
 
     waitingOnRetry = !success;
-- 
cgit v1.2.3


From 6babda7123be5e69db137e77589d88c768c19345 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 13:34:16 -0700
Subject: Fix up a few statistics problems. Stats pretty much line up with old
 code, except: - bug in old code included L1 latency in L2 miss time, making
 it too high - UniCoherence did cache-to-cache transfers even from non-owner
 caches, so occasionally the icache would get a block from the dcache not the
 L2 - L2 can now receive ReadExReq from L1 since L1s have coherence

--HG--
extra : convert_revision : 5052c1a1767b5a662f30a88f16012165a73b791c
---
 src/mem/cache/base_cache.cc      | 54 +++++++++++++++++++++-------------------
 src/mem/cache/base_cache.hh      |  6 ++---
 src/mem/cache/cache_impl.hh      | 21 +++++++++-------
 src/mem/cache/miss/mshr.cc       | 20 +++++++--------
 src/mem/cache/miss/mshr.hh       | 10 +++++---
 src/mem/cache/miss/mshr_queue.cc |  4 +--
 src/mem/cache/miss/mshr_queue.hh |  6 ++---
 src/mem/tport.hh                 |  2 +-
 8 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 870658675..ec9e1cf9b 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -150,20 +150,29 @@ BaseCache::regStats()
             ;
     }
 
+// These macros make it easier to sum the right subset of commands and
+// to change the subset of commands that are considered "demand" vs
+// "non-demand"
+#define SUM_DEMAND(s) \
+    (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq])
+
+// should writebacks be included here?  prior code was inconsistent...
+#define SUM_NON_DEMAND(s) \
+    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq])
+
     demandHits
         .name(name() + ".demand_hits")
         .desc("number of demand (read+write) hits")
         .flags(total)
         ;
-    demandHits = hits[MemCmd::ReadReq] + hits[MemCmd::WriteReq];
+    demandHits = SUM_DEMAND(hits);
 
     overallHits
         .name(name() + ".overall_hits")
         .desc("number of overall hits")
         .flags(total)
         ;
-    overallHits = demandHits + hits[MemCmd::SoftPFReq] + hits[MemCmd::HardPFReq]
-        + hits[MemCmd::Writeback];
+    overallHits = demandHits + SUM_NON_DEMAND(hits);
 
     // Miss statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -183,15 +192,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) misses")
         .flags(total)
         ;
-    demandMisses = misses[MemCmd::ReadReq] + misses[MemCmd::WriteReq];
+    demandMisses = SUM_DEMAND(misses);
 
     overallMisses
         .name(name() + ".overall_misses")
         .desc("number of overall misses")
         .flags(total)
         ;
-    overallMisses = demandMisses + misses[MemCmd::SoftPFReq] +
-        misses[MemCmd::HardPFReq] + misses[MemCmd::Writeback];
+    overallMisses = demandMisses + SUM_NON_DEMAND(misses);
 
     // Miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -211,15 +219,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) miss cycles")
         .flags(total)
         ;
-    demandMissLatency = missLatency[MemCmd::ReadReq] + missLatency[MemCmd::WriteReq];
+    demandMissLatency = SUM_DEMAND(missLatency);
 
     overallMissLatency
         .name(name() + ".overall_miss_latency")
         .desc("number of overall miss cycles")
         .flags(total)
         ;
-    overallMissLatency = demandMissLatency + missLatency[MemCmd::SoftPFReq] +
-        missLatency[MemCmd::HardPFReq];
+    overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
 
     // access formulas
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -368,15 +375,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR hits")
         .flags(total)
         ;
-    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
+    demandMshrHits = SUM_DEMAND(mshr_hits);
 
     overallMshrHits
         .name(name() + ".overall_mshr_hits")
         .desc("number of overall MSHR hits")
         .flags(total)
         ;
-    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
-        mshr_hits[MemCmd::HardPFReq];
+    overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
 
     // MSHR miss statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -396,15 +402,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR misses")
         .flags(total)
         ;
-    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
+    demandMshrMisses = SUM_DEMAND(mshr_misses);
 
     overallMshrMisses
         .name(name() + ".overall_mshr_misses")
         .desc("number of overall MSHR misses")
         .flags(total)
         ;
-    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
-        mshr_misses[MemCmd::HardPFReq];
+    overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
 
     // MSHR miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -424,16 +429,15 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR miss cycles")
         .flags(total)
         ;
-    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
-        + mshr_miss_latency[MemCmd::WriteReq];
+    demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
 
     overallMshrMissLatency
         .name(name() + ".overall_mshr_miss_latency")
         .desc("number of overall MSHR miss cycles")
         .flags(total)
         ;
-    overallMshrMissLatency = demandMshrMissLatency +
-        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
+    overallMshrMissLatency =
+        demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
 
     // MSHR uncacheable statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -453,9 +457,8 @@ BaseCache::regStats()
         .desc("number of overall MSHR uncacheable misses")
         .flags(total)
         ;
-    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
-        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
-        + mshr_uncacheable[MemCmd::HardPFReq];
+    overallMshrUncacheable =
+        SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
 
     // MSHR miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -475,10 +478,9 @@ BaseCache::regStats()
         .desc("number of overall MSHR uncacheable cycles")
         .flags(total)
         ;
-    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
-        + mshr_uncacheable_lat[MemCmd::WriteReq]
-        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
-        + mshr_uncacheable_lat[MemCmd::HardPFReq];
+    overallMshrUncacheableLatency =
+        SUM_DEMAND(mshr_uncacheable_lat) +
+        SUM_NON_DEMAND(mshr_uncacheable_lat);
 
 #if 0
     // MSHR access formulas
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 09484a14a..fcc040bd9 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -476,10 +476,10 @@ class BaseCache : public MemObject
         }
     }
 
-    Tick nextMSHRReadyTick()
+    Tick nextMSHRReadyTime()
     {
-        return std::min(mshrQueue.nextMSHRReadyTick(),
-                        writeBuffer.nextMSHRReadyTick());
+        return std::min(mshrQueue.nextMSHRReadyTime(),
+                        writeBuffer.nextMSHRReadyTime());
     }
 
     /**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 1823ea6b9..568e7ff63 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -615,7 +615,7 @@ Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
 
             if (!target->pkt->req->isUncacheable()) {
                 missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                    completion_time - target->time;
+                    completion_time - target->recvTime;
             }
             target->pkt->makeTimingResponse();
             cpuSidePort->respond(target->pkt, completion_time);
@@ -668,11 +668,14 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     // Can we deallocate MSHR when done?
     bool deallocate = false;
 
+    // Initial target is used just for stats
+    MSHR::Target *initial_tgt = mshr->getTarget();
+    int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
+    Tick miss_latency = curTick - initial_tgt->recvTime;
+
     if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
+        mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+            miss_latency;
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -698,8 +701,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         }
     } else {
         if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
+            mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+                miss_latency;
         }
 
         while (mshr->hasTargets()) {
@@ -1262,8 +1265,8 @@ Cache<TagStore>::MemSidePort::sendPacket()
     // tried to send packet... if it was successful (no retry), see if
     // we need to rerequest bus or not
     if (!waitingOnRetry) {
-        Tick nextReady = std::min(deferredPacketReadyTick(),
-                                  myCache()->nextMSHRReadyTick());
+        Tick nextReady = std::min(deferredPacketReadyTime(),
+                                  myCache()->nextMSHRReadyTime());
         // @TODO: need to facotr in prefetch requests here somehow
         if (nextReady != MaxTick) {
             DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 63b3cacc2..5d5e63f90 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -56,11 +56,11 @@ MSHR::MSHR()
 
 void
 MSHR::allocate(Addr _addr, int _size, PacketPtr target,
-               Tick when, Counter _order)
+               Tick whenReady, Counter _order)
 {
     addr = _addr;
     size = _size;
-    readyTick = when;
+    readyTime = whenReady;
     order = _order;
     assert(target);
     isCacheFill = false;
@@ -71,7 +71,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem-side request), so set cpuSide to true here.
-    targets.push_back(Target(target, when, _order, true));
+    targets.push_back(Target(target, whenReady, _order, true));
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
@@ -94,33 +94,33 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
+MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order)
 {
     if (inService) {
         if (!deferredTargets.empty() || pendingInvalidate ||
             (!needsExclusive && target->needsExclusive())) {
             // need to put on deferred list
-            deferredTargets.push_back(Target(target, when, _order, true));
+            deferredTargets.push_back(Target(target, whenReady, _order, true));
             if (target->needsExclusive()) {
                 deferredNeedsExclusive = true;
             }
         } else {
             // still OK to append to outstanding request
-            targets.push_back(Target(target, when, _order, true));
+            targets.push_back(Target(target, whenReady, _order, true));
         }
     } else {
         if (target->needsExclusive()) {
             needsExclusive = true;
         }
 
-        targets.push_back(Target(target, when, _order, true));
+        targets.push_back(Target(target, whenReady, _order, true));
     }
 
     ++ntargets;
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
+MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -137,7 +137,7 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
     if (needsExclusive || pkt->needsExclusive()) {
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
-        targets.push_back(Target(new Packet(pkt), when, _order, false));
+        targets.push_back(Target(new Packet(pkt), whenReady, _order, false));
         ++ntargets;
 
         if (needsExclusive) {
@@ -177,7 +177,7 @@ MSHR::promoteDeferredTargets()
     pendingShared = false;
     deferredNeedsExclusive = false;
     order = targets.front().order;
-    readyTick = std::max(curTick, targets.front().time);
+    readyTime = std::max(curTick, targets.front().readyTime);
 
     return true;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 4db7b1cfe..293f290b8 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -54,15 +54,17 @@ class MSHR : public Packet::SenderState
 
     class Target {
       public:
-        Tick time;      //!< Time when request was received (for stats)
+        Tick recvTime;  //!< Time when request was received (for stats)
+        Tick readyTime; //!< Time when request is ready to be serviced
         Counter order;  //!< Global order (for memory consistency mgmt)
         PacketPtr pkt;  //!< Pending request packet.
         bool cpuSide;   //!< Did request come from cpu side or mem side?
 
         bool isCpuSide() { return cpuSide; }
 
-        Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide)
-            : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide)
+        Target(PacketPtr _pkt, Tick _readyTime, Counter _order, bool _cpuSide)
+            : recvTime(curTick), readyTime(_readyTime), order(_order),
+              pkt(_pkt), cpuSide(_cpuSide)
         {}
     };
 
@@ -81,7 +83,7 @@ class MSHR : public Packet::SenderState
     MSHRQueue *queue;
 
     /** Cycle when ready to issue */
-    Tick readyTick;
+    Tick readyTime;
 
     /** Order number assigned by the miss queue. */
     Counter order;
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 18184bd20..56ec62a7d 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -111,14 +111,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 MSHR::Iterator
 MSHRQueue::addToReadyList(MSHR *mshr)
 {
-    if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) {
+    if (readyList.empty() || readyList.back()->readyTime <= mshr->readyTime) {
         return readyList.insert(readyList.end(), mshr);
     }
 
     MSHR::Iterator i = readyList.begin();
     MSHR::Iterator end = readyList.end();
     for (; i != end; ++i) {
-        if ((*i)->readyTick > mshr->readyTick) {
+        if ((*i)->readyTime > mshr->readyTime) {
             return readyList.insert(i, mshr);
         }
     }
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index fd61dec8b..1f1d59e98 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -193,15 +193,15 @@ class MSHRQueue
      */
     MSHR *getNextMSHR() const
     {
-        if (readyList.empty() || readyList.front()->readyTick > curTick) {
+        if (readyList.empty() || readyList.front()->readyTime > curTick) {
             return NULL;
         }
         return readyList.front();
     }
 
-    Tick nextMSHRReadyTick() const
+    Tick nextMSHRReadyTime() const
     {
-        return readyList.empty() ? MaxTick : readyList.front()->readyTick;
+        return readyList.empty() ? MaxTick : readyList.front()->readyTime;
     }
 };
 
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index bfed29f34..bc9da6c44 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -105,7 +105,7 @@ class SimpleTimingPort : public Port
     bool deferredPacketReady()
     { return !transmitList.empty() && transmitList.front().tick <= curTick; }
 
-    Tick deferredPacketReadyTick()
+    Tick deferredPacketReadyTime()
     { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
 
     void schedSendEvent(Tick when)
-- 
cgit v1.2.3


From f0c4dd79200bb76f472aa09d6aff02b67a1db8c5 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 13:56:25 -0700
Subject: Factor out a little more common code.

--HG--
extra : convert_revision : 626255a91679d534030c91bcdb4fc1bed36ceb9b
---
 src/mem/cache/cache_impl.hh | 78 +++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 46 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 568e7ff63..b4c3c6359 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -143,6 +143,37 @@ Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 }
 
 
+template<class TagStore>
+void
+Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
+{
+    assert(blk);
+    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+    // Check RMW operations first since both isRead() and
+    // isWrite() will be true for them
+    if (pkt->cmd == MemCmd::SwapReq) {
+        cmpAndSwap(blk, pkt);
+    } else if (pkt->isWrite()) {
+        if (blk->checkWrite(pkt)) {
+            blk->status |= BlkDirty;
+            pkt->writeDataToBlock(blk->data, blkSize);
+        }
+    } else if (pkt->isRead()) {
+        if (pkt->isLocked()) {
+            blk->trackLoadLocked(pkt);
+        }
+        pkt->setDataFromBlock(blk->data, blkSize);
+    } else {
+        // Not a read or write... must be an upgrade.  it's OK
+        // to just ack those as long as we have an exclusive
+        // copy at this level.
+        assert(pkt->cmd == MemCmd::UpgradeReq);
+    }
+}
+
+
 /////////////////////////////////////////////////////
 //
 // MSHR helper functions
@@ -237,27 +268,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
             // OK to satisfy access
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
-
-            // Check RMW operations first since both isRead() and
-            // isWrite() will be true for them
-            if (pkt->cmd == MemCmd::SwapReq) {
-                cmpAndSwap(blk, pkt);
-            } else if (pkt->isWrite()) {
-                if (blk->checkWrite(pkt)) {
-                    blk->status |= BlkDirty;
-                    pkt->writeDataToBlock(blk->data, blkSize);
-                }
-            } else if (pkt->isRead()) {
-                if (pkt->isLocked()) {
-                    blk->trackLoadLocked(pkt);
-                }
-                pkt->setDataFromBlock(blk->data, blkSize);
-            } else {
-                // Not a read or write... must be an upgrade.  it's OK
-                // to just ack those as long as we have an exclusive
-                // copy at this level.
-                assert(pkt->cmd == MemCmd::UpgradeReq);
-            }
+            satisfyCpuSideRequest(pkt, blk);
         } else {
             // permission violation... nothing to do here, leave unsatisfied
             // for statistics purposes this counts like a complete miss
@@ -558,31 +569,6 @@ Cache<TagStore>::functionalAccess(PacketPtr pkt,
 /////////////////////////////////////////////////////
 
 
-template<class TagStore>
-void
-Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
-{
-    assert(blk);
-    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
-    assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-    if (pkt->isWrite()) {
-        if (blk->checkWrite(pkt)) {
-            blk->status |= BlkDirty;
-            pkt->writeDataToBlock(blk->data, blkSize);
-        }
-    } else if (pkt->isReadWrite()) {
-        cmpAndSwap(blk, pkt);
-    } else {
-        if (pkt->isLocked()) {
-            blk->trackLoadLocked(pkt);
-        }
-        pkt->setDataFromBlock(blk->data, blkSize);
-    }
-}
-
-
 template<class TagStore>
 bool
 Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-- 
cgit v1.2.3


From ee54ad318a63e868ab10bbc1b714bbb8209a11da Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:45:58 -0700
Subject: Event descriptions should not end in "event" (they function as
 adjectives not nouns)

--HG--
extra : convert_revision : 6506474ff3356ae8c80ed276c3608d8a4680bfdb
---
 src/arch/mips/regfile/misc_regfile.cc  | 2 +-
 src/cpu/base.cc                        | 2 +-
 src/cpu/o3/commit_impl.hh              | 2 +-
 src/cpu/o3/cpu.cc                      | 6 +++---
 src/cpu/o3/inst_queue_impl.hh          | 2 +-
 src/cpu/o3/lsq_unit_impl.hh            | 2 +-
 src/cpu/ozone/back_end_impl.hh         | 4 ++--
 src/cpu/ozone/cpu_impl.hh              | 2 +-
 src/cpu/ozone/inorder_back_end_impl.hh | 2 +-
 src/cpu/ozone/inst_queue_impl.hh       | 2 +-
 src/cpu/ozone/lsq_unit_impl.hh         | 2 +-
 src/cpu/ozone/lw_back_end_impl.hh      | 2 +-
 src/cpu/ozone/lw_lsq_impl.hh           | 2 +-
 src/cpu/quiesce_event.cc               | 2 +-
 src/cpu/simple/atomic.cc               | 2 +-
 src/cpu/simple/timing.hh               | 6 +++---
 src/cpu/trace/opt_cpu.cc               | 2 +-
 src/cpu/trace/trace_cpu.cc             | 2 +-
 src/dev/ethertap.hh                    | 2 +-
 src/dev/uart8250.cc                    | 2 +-
 src/mem/bridge.hh                      | 2 +-
 21 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/arch/mips/regfile/misc_regfile.cc b/src/arch/mips/regfile/misc_regfile.cc
index c97d93cf9..71be3adf9 100755
--- a/src/arch/mips/regfile/misc_regfile.cc
+++ b/src/arch/mips/regfile/misc_regfile.cc
@@ -357,7 +357,7 @@ MiscRegFile::CP0Event::process()
 const char *
 MiscRegFile::CP0Event::description()
 {
-    return "Coprocessor-0 event";
+    return "Coprocessor-0";
 }
 
 void
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index f86313da0..cf007a06b 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -91,7 +91,7 @@ CPUProgressEvent::process()
 const char *
 CPUProgressEvent::description()
 {
-    return "CPU Progress event";
+    return "CPU Progress";
 }
 
 #if FULL_SYSTEM
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 9411c6c62..f263383ae 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -67,7 +67,7 @@ template <class Impl>
 const char *
 DefaultCommit<Impl>::TrapEvent::description()
 {
-    return "Trap event";
+    return "Trap";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 6a3eb9c43..2bf8f9832 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -83,7 +83,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::TickEvent::description()
 {
-    return "FullO3CPU tick event";
+    return "FullO3CPU tick";
 }
 
 template <class Impl>
@@ -112,7 +112,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::ActivateThreadEvent::description()
 {
-    return "FullO3CPU \"Activate Thread\" event";
+    return "FullO3CPU \"Activate Thread\"";
 }
 
 template <class Impl>
@@ -144,7 +144,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::DeallocateContextEvent::description()
 {
-    return "FullO3CPU \"Deallocate Context\" event";
+    return "FullO3CPU \"Deallocate Context\"";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index bdf5f07aa..99bffe1a6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -60,7 +60,7 @@ template <class Impl>
 const char *
 InstructionQueue<Impl>::FUCompletion::description()
 {
-    return "Functional unit completion event";
+    return "Functional unit completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 91e616589..810a6d29f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -69,7 +69,7 @@ template<class Impl>
 const char *
 LSQUnit<Impl>::WritebackEvent::description()
 {
-    return "Store writeback event";
+    return "Store writeback";
 }
 
 template<class Impl>
diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh
index 4078699fe..27146ecf0 100644
--- a/src/cpu/ozone/back_end_impl.hh
+++ b/src/cpu/ozone/back_end_impl.hh
@@ -583,7 +583,7 @@ template<class Impl>
 const char *
 BackEnd<Impl>::LdWritebackEvent::description()
 {
-    return "Load writeback event";
+    return "Load writeback";
 }
 
 
@@ -603,7 +603,7 @@ template <class Impl>
 const char *
 BackEnd<Impl>::DCacheCompletionEvent::description()
 {
-    return "Cache completion event";
+    return "Cache completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index d1214223b..d73e5768a 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -84,7 +84,7 @@ template <class Impl>
 const char *
 OzoneCPU<Impl>::TickEvent::description()
 {
-    return "OzoneCPU tick event";
+    return "OzoneCPU tick";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh
index 8d7ebb60e..c57fa0200 100644
--- a/src/cpu/ozone/inorder_back_end_impl.hh
+++ b/src/cpu/ozone/inorder_back_end_impl.hh
@@ -540,5 +540,5 @@ template <class Impl>
 const char *
 InorderBackEnd<Impl>::DCacheCompletionEvent::description()
 {
-    return "DCache completion event";
+    return "DCache completion";
 }
diff --git a/src/cpu/ozone/inst_queue_impl.hh b/src/cpu/ozone/inst_queue_impl.hh
index ea9d03c0d..461c7eb0f 100644
--- a/src/cpu/ozone/inst_queue_impl.hh
+++ b/src/cpu/ozone/inst_queue_impl.hh
@@ -64,7 +64,7 @@ template <class Impl>
 const char *
 InstQueue<Impl>::FUCompletion::description()
 {
-    return "Functional unit completion event";
+    return "Functional unit completion";
 }
 #endif
 template <class Impl>
diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh
index c46eb90be..e08e54835 100644
--- a/src/cpu/ozone/lsq_unit_impl.hh
+++ b/src/cpu/ozone/lsq_unit_impl.hh
@@ -62,7 +62,7 @@ template <class Impl>
 const char *
 OzoneLSQ<Impl>::StoreCompletionEvent::description()
 {
-    return "LSQ store completion event";
+    return "LSQ store completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh
index c0a9cad24..f84bda348 100644
--- a/src/cpu/ozone/lw_back_end_impl.hh
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@ -121,7 +121,7 @@ template <class Impl>
 const char *
 LWBackEnd<Impl>::TrapEvent::description()
 {
-    return "Trap event";
+    return "Trap";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index eefc0df83..e3000288c 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -57,7 +57,7 @@ template<class Impl>
 const char *
 OzoneLWLSQ<Impl>::WritebackEvent::description()
 {
-    return "Store writeback event";
+    return "Store writeback";
 }
 
 template <class Impl>
diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc
index fa79e6d1e..3495a0e52 100644
--- a/src/cpu/quiesce_event.cc
+++ b/src/cpu/quiesce_event.cc
@@ -47,5 +47,5 @@ EndQuiesceEvent::process()
 const char*
 EndQuiesceEvent::description()
 {
-    return "End Quiesce Event.";
+    return "End Quiesce";
 }
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index bcd6662c8..8e8da2fa2 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -57,7 +57,7 @@ AtomicSimpleCPU::TickEvent::process()
 const char *
 AtomicSimpleCPU::TickEvent::description()
 {
-    return "AtomicSimpleCPU tick event";
+    return "AtomicSimpleCPU tick";
 }
 
 Port *
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 39958bfb6..ba194b3fa 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -101,7 +101,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
             TickEvent(TimingSimpleCPU *_cpu)
                 :Event(&mainEventQueue), cpu(_cpu) {}
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU tick"; }
             void schedule(PacketPtr _pkt, Tick t);
         };
 
@@ -127,7 +127,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
             ITickEvent(TimingSimpleCPU *_cpu)
                 : TickEvent(_cpu) {}
             void process();
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU icache tick"; }
         };
 
         ITickEvent tickEvent;
@@ -155,7 +155,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
             DTickEvent(TimingSimpleCPU *_cpu)
                 : TickEvent(_cpu) {}
             void process();
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU dcache tick"; }
         };
 
         DTickEvent tickEvent;
diff --git a/src/cpu/trace/opt_cpu.cc b/src/cpu/trace/opt_cpu.cc
index 996e89f01..0f2944f07 100644
--- a/src/cpu/trace/opt_cpu.cc
+++ b/src/cpu/trace/opt_cpu.cc
@@ -207,7 +207,7 @@ OptCPU::TickEvent::process()
 const char *
 OptCPU::TickEvent::description()
 {
-    return "OptCPU tick event";
+    return "OptCPU tick";
 }
 
 
diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc
index 3c9da4849..32ed6c7d7 100644
--- a/src/cpu/trace/trace_cpu.cc
+++ b/src/cpu/trace/trace_cpu.cc
@@ -148,7 +148,7 @@ TraceCPU::TickEvent::process()
 const char *
 TraceCPU::TickEvent::description()
 {
-    return "TraceCPU tick event";
+    return "TraceCPU tick";
 }
 
 
diff --git a/src/dev/ethertap.hh b/src/dev/ethertap.hh
index f64ed7187..3d2838817 100644
--- a/src/dev/ethertap.hh
+++ b/src/dev/ethertap.hh
@@ -89,7 +89,7 @@ class EtherTap : public EtherInt
         TxEvent(EtherTap *_tap)
             : Event(&mainEventQueue), tap(_tap) {}
         void process() { tap->retransmit(); }
-        virtual const char *description() { return "retransmit event"; }
+        virtual const char *description() { return "EtherTap retransmit"; }
     };
 
     friend class TxEvent;
diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc
index 0ad80e077..358dda0d8 100644
--- a/src/dev/uart8250.cc
+++ b/src/dev/uart8250.cc
@@ -58,7 +58,7 @@ Uart8250::IntrEvent::IntrEvent(Uart8250 *u, int bit)
 const char *
 Uart8250::IntrEvent::description()
 {
-    return "uart interrupt delay event";
+    return "uart interrupt delay";
 }
 
 void
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 7af764437..acae2f126 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -146,7 +146,7 @@ class Bridge : public MemObject
 
             virtual void process() { port->trySend(); }
 
-            virtual const char *description() { return "bridge send event"; }
+            virtual const char *description() { return "bridge send"; }
         };
 
         SendEvent sendEvent;
-- 
cgit v1.2.3


From d10a843723009ddee79cdbf94a46704df1e5cee6 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:51:29 -0700
Subject: Get rid of obsolete fixPacket() functions. Handled by
 Packet::checkFunctional() now.

--HG--
extra : convert_revision : 63642254e2789c80a369ac269f317ec054ffe3c0
---
 src/mem/packet.cc | 25 -------------------------
 src/mem/packet.hh | 16 ----------------
 src/mem/tport.cc  |  7 ++-----
 3 files changed, 2 insertions(+), 46 deletions(-)

diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 55fe13f3c..8de02f533 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -164,31 +164,6 @@ Packet::intersect(PacketPtr p)
     return !(s1 > e2 || e1 < s2);
 }
 
-bool
-fixDelayedResponsePacket(PacketPtr func, PacketPtr timing)
-{
-    bool result;
-
-    if (timing->isRead() || timing->isWrite()) {
-        // Ugly hack to deal with the fact that we queue the requests
-        // and don't convert them to responses until we issue them on
-        // the bus.  I tried to avoid this by converting packets to
-        // responses right away, but this breaks during snoops where a
-        // responder may do the conversion before other caches have
-        // done the snoop.  Would work if we copied the packet instead
-        // of just hanging on to a pointer.
-        MemCmd oldCmd = timing->cmd;
-        timing->cmd = timing->cmd.responseCommand();
-        result = fixPacket(func, timing);
-        timing->cmd = oldCmd;
-    }
-    else {
-        //Don't toggle if it isn't a read/write response
-        result = fixPacket(func, timing);
-    }
-
-    return result;
-}
 
 bool
 Packet::checkFunctional(Addr addr, int size, uint8_t *data)
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 10b9f490c..16bc6f458 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -574,22 +574,6 @@ class Packet : public FastAlloc
     }
 };
 
-
-
-/** Temporary for backwards compatibility.
- */
-inline
-bool fixPacket(PacketPtr func, PacketPtr timing) {
-    return !func->checkFunctional(timing);
-}
-
-/** This function is a wrapper for the fixPacket field that toggles
- * the hasData bit it is used when a response is waiting in the
- * caches, but hasn't been marked as a response yet (so the fixPacket
- * needs to get the correct value for the hasData)
- */
-bool fixDelayedResponsePacket(PacketPtr func, PacketPtr timing);
-
 std::ostream & operator<<(std::ostream &o, const Packet &p);
 
 #endif //__MEM_PACKET_HH
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index d6ff64608..a4f791048 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -40,11 +40,8 @@ SimpleTimingPort::checkFunctional(PacketPtr pkt)
         PacketPtr target = i->pkt;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
-        if (target->intersect(pkt)) {
-            if (!fixPacket(pkt, target)) {
-                // fixPacket returns true for continue, false for done
-                return;
-            }
+        if (pkt->checkFunctional(target)) {
+            return;
         }
     }
 }
-- 
cgit v1.2.3


From 2447abe5ce6c40e61eb09430c95a592aa2445349 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:56:30 -0700
Subject: Can only call makeAtomicResponse() once...

--HG--
extra : convert_revision : c49aade46aa64f979da35eb653b544ee5bd82f01
---
 src/dev/ide_ctrl.cc | 9 +++++----
 src/dev/ns_gige.cc  | 2 +-
 src/dev/pcidev.cc   | 1 -
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 01243ae73..07764aaba 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -232,8 +232,10 @@ Tick
 IdeController::readConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
-    if (offset < PCI_DEVICE_SPECIFIC)
-        return  PciDev::readConfig(pkt);
+    if (offset < PCI_DEVICE_SPECIFIC) {
+        return PciDev::readConfig(pkt);
+    }
+
     assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END);
 
     pkt->allocate();
@@ -297,7 +299,6 @@ IdeController::readConfig(PacketPtr pkt)
     }
     pkt->makeAtomicResponse();
     return configDelay;
-
 }
 
 
@@ -361,6 +362,7 @@ IdeController::writeConfig(PacketPtr pkt)
           default:
             panic("invalid access size(?) for PCI configspace!\n");
         }
+        pkt->makeAtomicResponse();
     }
 
     /* Trap command register writes and enable IO/BM as appropriate as well as
@@ -403,7 +405,6 @@ IdeController::writeConfig(PacketPtr pkt)
             bm_enabled = false;
         break;
     }
-    pkt->makeAtomicResponse();
     return configDelay;
 }
 
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index 86f664238..17f7b433b 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt)
             ioEnable = false;
         break;
     }
-    pkt->makeAtomicResponse();
+
     return configDelay;
 }
 
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index 85337c841..06806f841 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -284,7 +284,6 @@ PciDev::writeConfig(PacketPtr pkt)
     }
     pkt->makeAtomicResponse();
     return configDelay;
-
 }
 
 void
-- 
cgit v1.2.3


From 07f091d6ed63d9b54c0415eacc070c3ea67566fc Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:59:45 -0700
Subject: Get rid of remaining traces of obsolete CoherenceProtocol object.

--HG--
extra : convert_revision : c5555b00bef1b304a84886188ad2c0dcb4d7c5b9
---
 configs/common/Caches.py                    | 1 -
 configs/splash2/cluster.py                  | 6 +-----
 configs/splash2/run.py                      | 4 ----
 src/mem/cache/BaseCache.py                  | 1 -
 tests/configs/memtest.py                    | 1 -
 tests/configs/o3-timing-mp.py               | 1 -
 tests/configs/simple-atomic-mp.py           | 1 -
 tests/configs/simple-timing-mp.py           | 1 -
 tests/configs/tsunami-simple-atomic-dual.py | 1 -
 tests/configs/tsunami-simple-atomic.py      | 1 -
 tests/configs/tsunami-simple-timing-dual.py | 1 -
 tests/configs/tsunami-simple-timing.py      | 1 -
 12 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/configs/common/Caches.py b/configs/common/Caches.py
index 4bff2c8a4..43a1c6378 100644
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -35,7 +35,6 @@ class L1Cache(BaseCache):
     latency = '1ns'
     mshrs = 10
     tgts_per_mshr = 5
-    protocol = CoherenceProtocol(protocol='moesi')
 
 class L2Cache(BaseCache):
     assoc = 8
diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py
index 799b85e6c..769bdcf5a 100644
--- a/configs/splash2/cluster.py
+++ b/configs/splash2/cluster.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -51,9 +51,6 @@ parser.add_option("-n", "--numcpus",
 parser.add_option("-f", "--frequency",
                   default = "1GHz",
                   help="Frequency of each CPU")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
 parser.add_option("--l1size",
                   default = "32kB")
 parser.add_option("--l1latency",
@@ -141,7 +138,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache Definition
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index d051f1f1b..ff0a9448c 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -48,9 +48,6 @@ parser.add_option("-n", "--numcpus",
 parser.add_option("-f", "--frequency",
                   default = "1GHz",
                   help="Frequency of each CPU")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
 parser.add_option("--l1size",
                   default = "32kB")
 parser.add_option("--l1latency",
@@ -162,7 +159,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache Definition
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 55b68f81f..86148f821 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -51,7 +51,6 @@ class BaseCache(MemObject):
     mshrs = Param.Int("number of MSHRs (max outstanding requests)")
     prioritizeRequests = Param.Bool(False,
         "always service demand misses first")
-    protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use")
     repl = Param.Repl(NULL, "replacement policy")
     size = Param.MemorySize("capacity in bytes")
     split = Param.Bool(False, "whether or not this cache is split")
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index 6fe244acf..93ea4cc0e 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py
index 1ac9bd2e4..fc6a72a82 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -39,7 +39,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
index de0793d1c..bc0ced250 100644
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
index 1fd0e8c3c..0b400e6b7 100644
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index 131095055..de8fe2474 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py
index 595b1aeda..2ba50273a 100644
--- a/tests/configs/tsunami-simple-atomic.py
+++ b/tests/configs/tsunami-simple-atomic.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index 47fba30ff..3b1a4f5cf 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py
index 999bde087..3f18c6848 100644
--- a/tests/configs/tsunami-simple-timing.py
+++ b/tests/configs/tsunami-simple-timing.py
@@ -41,7 +41,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
-- 
cgit v1.2.3


From 5e59739416bf195173f4b37ba9afb1cb8ae16566 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 18:03:17 -0700
Subject: Don't propagate snoops across bridges.  Wouldn't work anyway.

--HG--
extra : convert_revision : af29fc7d0c134f5e89dd2e814c819151350fcb38
---
 src/mem/bridge.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 77178d518..92beb3d7e 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -360,6 +360,8 @@ Bridge::BridgePort::getDeviceAddressRanges(AddrRangeList &resp,
                                            bool &snoop)
 {
     otherPort->getPeerAddressRanges(resp, snoop);
+    // we don't allow snooping across bridges
+    snoop = false;
 }
 
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bridge)
-- 
cgit v1.2.3


From 3ad761bc8e89ff034fbf5ec6d8e9661e1025dcd7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 20:35:42 -0700
Subject: Make CPU models use new LoadLockedReq/StoreCondReq commands.

--HG--
extra : convert_revision : ab78d9d1d88c3698edfd653d71c8882e1272b781
---
 src/cpu/o3/lsq_unit.hh       |  5 ++++-
 src/cpu/o3/lsq_unit_impl.hh  |  4 +++-
 src/cpu/ozone/lw_lsq.hh      |  6 +++++-
 src/cpu/ozone/lw_lsq_impl.hh |  5 ++++-
 src/cpu/simple/atomic.cc     | 36 +++++++++++++++++++++---------------
 src/cpu/simple/timing.cc     | 33 +++++++++++++++++++--------------
 6 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index d964b9f9f..be9224099 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -643,7 +643,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // if we the cache is not blocked, do cache access
     if (!lsq->cacheBlocked()) {
         PacketPtr data_pkt =
-            new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+            new Packet(req,
+                       (req->isLocked() ?
+                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                       Packet::Broadcast);
         data_pkt->dataStatic(load_inst->memData);
 
         LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 810a6d29f..5ae1cc0e4 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -647,7 +647,9 @@ LSQUnit<Impl>::writebackStores()
 
         memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
 
-        MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq;
+        MemCmd command =
+            req->isSwap() ? MemCmd::SwapReq :
+            (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq);
         PacketPtr data_pkt = new Packet(req, command,
                                         Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index d9e0d04ac..ba40e9ce1 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -632,7 +632,11 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
     DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n",
             inst->readPC());
 
-    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+    PacketPtr data_pkt =
+        new Packet(req,
+                   (req->isLocked() ?
+                    MemCmd::LoadLockedReq : Packet::ReadReq),
+                   Packet::Broadcast);
     data_pkt->dataStatic(inst->memData);
 
     LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index e3000288c..82191312a 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -587,7 +587,10 @@ OzoneLWLSQ<Impl>::writebackStores()
         memcpy(inst->memData, (uint8_t *)&(*sq_it).data,
                req->getSize());
 
-        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        MemCmd command =
+            req->isSwap() ? MemCmd::SwapReq :
+            (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq);
+        PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
 
         LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 8e8da2fa2..01eb4873e 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -280,7 +280,10 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        Packet pkt =
+            Packet(req,
+                   req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
+                   Packet::Broadcast);
         pkt.dataStatic(&data);
 
         if (req->isMmapedIpr())
@@ -370,23 +373,24 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet pkt =
-            Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq,
-                   Packet::Broadcast);
-        pkt.dataStatic(&data);
-
+        MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
         if (req->isLocked()) {
+            cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
+        } else if (req->isSwap()) {
+            cmd = MemCmd::SwapReq;
+            if (req->isCondSwap()) {
+                assert(res);
+                req->setExtraData(*res);
+            }
         }
-        if (req->isCondSwap()) {
-             assert(res);
-             req->setExtraData(*res);
-        }
-
 
         if (do_access) {
+            Packet pkt = Packet(req, cmd, Packet::Broadcast);
+            pkt.dataStatic(&data);
+
             if (req->isMmapedIpr()) {
                 dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt);
             } else {
@@ -395,12 +399,14 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
             }
             dcache_access = true;
             assert(!pkt.isError());
+
+            if (req->isSwap()) {
+                assert(res);
+                *res = pkt.get<T>();
+            }
         }
 
-        if (req->isSwap()) {
-            assert(res);
-            *res = pkt.get<T>();
-        } else if (res) {
+        if (res && !req->isSwap()) {
             *res = req->getExtraData();
         }
     }
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index b4e4a4433..77df2c05d 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -260,7 +260,10 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
     // Now do the access.
     if (fault == NoFault) {
         PacketPtr pkt =
-            new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+            new Packet(req,
+                       (req->isLocked() ?
+                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                       Packet::Broadcast);
         pkt->dataDynamic<T>(new T);
 
         if (!dcachePort.sendTiming(pkt)) {
@@ -350,25 +353,27 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
-        assert(dcache_pkt == NULL);
-        if (req->isSwap())
-            dcache_pkt = new Packet(req, MemCmd::SwapReq, Packet::Broadcast);
-        else
-            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
-        dcache_pkt->allocate();
-        dcache_pkt->set(data);
-
+        MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
+        assert(dcache_pkt == NULL);
+
         if (req->isLocked()) {
+            cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
-        }
-        if (req->isCondSwap()) {
-             assert(res);
-             req->setExtraData(*res);
+        } else if (req->isSwap()) {
+            cmd = MemCmd::SwapReq;
+            if (req->isCondSwap()) {
+                assert(res);
+                req->setExtraData(*res);
+            }
         }
 
         if (do_access) {
+            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+            dcache_pkt->allocate();
+            dcache_pkt->set(data);
+
             if (!dcachePort.sendTiming(dcache_pkt)) {
                 _status = DcacheRetry;
             } else {
@@ -609,7 +614,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 
     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
 
-    if (pkt->isRead() && pkt->req->isLocked()) {
+    if (pkt->isRead() && pkt->isLocked()) {
         TheISA::handleLockedRead(thread, pkt->req);
     }
 
-- 
cgit v1.2.3


From ffd697e14933b3012aaaa0fb93168b2fda59ea4a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 01:02:35 -0700
Subject: bus.cc: Fix atomic timing issue.

src/mem/bus.cc:
    Fix atomic timing issue.

--HG--
extra : convert_revision : a22ff80cd75f83c785b0604c2a4fde2e2e9f71ef
---
 src/mem/bus.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 83ce0f87d..34f7f4fd0 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -377,7 +377,8 @@ Bus::recvAtomic(PacketPtr pkt)
     // original command so that additional snoops can take place
     // properly
     MemCmd orig_cmd = pkt->cmd;
-    MemCmd response_cmd = MemCmd::InvalidCmd;
+    MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
+    Tick snoop_response_latency = 0;
     int orig_src = pkt->getSrc();
 
     Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
@@ -388,15 +389,16 @@ Bus::recvAtomic(PacketPtr pkt)
         // same port should not have both target addresses and snooping
         assert(p != target_port);
         if (p->getId() != pkt->getSrc()) {
-            p->sendAtomic(pkt);
+            Tick latency = p->sendAtomic(pkt);
             if (pkt->isResponse()) {
                 // response from snoop agent
                 assert(pkt->cmd != orig_cmd);
                 assert(pkt->memInhibitAsserted());
                 // should only happen once
-                assert(response_cmd == MemCmd::InvalidCmd);
+                assert(snoop_response_cmd == MemCmd::InvalidCmd);
                 // save response state
-                response_cmd = pkt->cmd;
+                snoop_response_cmd = pkt->cmd;
+                snoop_response_latency = latency;
                 // restore original packet state for remaining snoopers
                 pkt->cmd = orig_cmd;
                 pkt->setSrc(orig_src);
@@ -405,19 +407,20 @@ Bus::recvAtomic(PacketPtr pkt)
         }
     }
 
-    Tick response_time = target_port->sendAtomic(pkt);
+    Tick response_latency = target_port->sendAtomic(pkt);
 
     // if we got a response from a snooper, restore it here
-    if (response_cmd != MemCmd::InvalidCmd) {
+    if (snoop_response_cmd != MemCmd::InvalidCmd) {
         // no one else should have responded
         assert(!pkt->isResponse());
         assert(pkt->cmd == orig_cmd);
-        pkt->cmd = response_cmd;
+        pkt->cmd = snoop_response_cmd;
+        response_latency = snoop_response_latency;
     }
 
     // why do we have this packet field and the return value both???
-    pkt->finishTime = std::max(response_time, curTick + clock);
-    return pkt->finishTime;
+    pkt->finishTime = curTick + response_latency;
+    return response_latency;
 }
 
 /** Function called by the port when the bus is receiving a Functional
-- 
cgit v1.2.3


From e9c04dad60f7a382fe94ca587fa505926dbd925c Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 09:26:36 -0700
Subject: Fix a couple LL/SC bugs that only affected timing mode.

src/cpu/simple/timing.cc:
    Fix swap/stq_c command bug.
src/mem/packet.cc:
    Fix incorrect LoadLockedReq command response field.

--HG--
extra : convert_revision : 7a4523be900bc2c9b1bdf2d372ce55f89ae58ae5
---
 src/cpu/simple/timing.cc | 2 +-
 src/mem/packet.cc        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 77df2c05d..492a669b8 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -370,7 +370,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         }
 
         if (do_access) {
-            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+            dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
             dcache_pkt->allocate();
             dcache_pkt->set(data);
 
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 8de02f533..8cd356768 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -99,7 +99,7 @@ MemCmd::commandInfo[] =
             InvalidCmd, "ReadExResp" },
     /* LoadLockedReq */
     { SET4(IsRead, IsLocked, IsRequest, NeedsResponse),
-            ReadResp, "LoadLockedReq" },
+            LoadLockedResp, "LoadLockedReq" },
     /* LoadLockedResp */
     { SET4(IsRead, IsLocked, IsResponse, HasData),
             InvalidCmd, "LoadLockedResp" },
-- 
cgit v1.2.3


From 4b68652c87f61fe0a2fd4040b79130de0846df85 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 13:57:45 -0700
Subject: Couple more minor bug fixes for FS timing mode.

src/cpu/simple/timing.cc:
    Fix another SC problem.
src/mem/cache/cache_impl.hh:
    Forgot to call makeTimingResponse() on uncached timing responses.

--HG--
extra : convert_revision : 5a5a58ca2053e4e8de2133205bfd37de15eb4209
---
 src/cpu/simple/timing.cc    | 13 +++++++------
 src/mem/cache/cache_impl.hh |  1 +
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 492a669b8..0c03815b5 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -356,8 +356,6 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
-        assert(dcache_pkt == NULL);
-
         if (req->isLocked()) {
             cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
@@ -369,11 +367,14 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
             }
         }
 
-        if (do_access) {
-            dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
-            dcache_pkt->allocate();
-            dcache_pkt->set(data);
+        // Note: need to allocate dcache_pkt even if do_access is
+        // false, as it's used unconditionally to call completeAcc().
+        assert(dcache_pkt == NULL);
+        dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
+        dcache_pkt->allocate();
+        dcache_pkt->set(data);
 
+        if (do_access) {
             if (!dcachePort.sendTiming(dcache_pkt)) {
                 _status = DcacheRetry;
             } else {
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b4c3c6359..0d76b6bec 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -698,6 +698,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
             if (pkt->isRead()) {
                 target->pkt->setData(pkt->getPtr<uint8_t>());
             }
+            target->pkt->makeTimingResponse();
             cpuSidePort->respond(target->pkt, time);
         }
         assert(!mshr->hasTargets());
-- 
cgit v1.2.3


From 4738649e32d06d92e6792b7ce80fcbd05627fc06 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 3 Jul 2007 00:40:31 -0400
Subject: Delete packets when we're done with them.

--HG--
extra : convert_revision : b8894d26e1ca7a6c9b736500accdaa53bfb09558
---
 src/mem/cache/cache_impl.hh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0d76b6bec..320e0be81 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -705,6 +705,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         deallocate = true;
     }
 
+    delete pkt;
+
     if (deallocate) {
         mq->deallocate(mshr);
         if (wasFull && !mq->isFull()) {
@@ -1242,6 +1244,9 @@ Cache<TagStore>::MemSidePort::sendPacket()
             waitingOnRetry = !success;
             if (waitingOnRetry) {
                 DPRINTF(CachePort, "now waiting on a retry\n");
+                if (!mshr->isSimpleForward()) {
+                    delete pkt;
+                }
             } else {
                 myCache()->markInService(mshr);
             }
-- 
cgit v1.2.3


From 3b4ff759398371ac14b7d694de1c87af245f7d42 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:14:53 -0700
Subject: Fix bug in copying packet with static data pointer.

--HG--
extra : convert_revision : 2fcf99f050d73e007433c1db2475f2893c5961a0
---
 src/mem/packet.hh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 16bc6f458..c90842dee 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -368,14 +368,15 @@ class Packet : public FastAlloc
     }
 
     /** Alternate constructor for copying a packet.  Copy all fields
-     * *except* set data allocation as static... even if the original
-     * packet's data was dynamic, we don't want to free it when the
-     * new packet is deallocated.  Note that if original packet used
-     * dynamic data, user must guarantee that the new packet's
-     * lifetime is less than that of the original packet. */
+     * *except* if the original packet's data was dynamic, don't copy
+     * that, as we can't guarantee that the new packet's lifetime is
+     * less than that of the original packet.  In this case the new
+     * packet should allocate its own data. */
     Packet(Packet *origPkt)
         :  cmd(origPkt->cmd), req(origPkt->req),
-           data(NULL), staticData(false), dynamicData(false), arrayData(false),
+           data(origPkt->staticData ? origPkt->data : NULL),
+           staticData(origPkt->staticData),
+           dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
            src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid),
-- 
cgit v1.2.3


From abd194df5c2dfd0ebf608c5d59196a08ca0ef630 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:16:58 -0700
Subject: Move a couple of DPRINTFs from Cache to CachePort.

--HG--
extra : convert_revision : 55a0d26660aeb8f63b41897d53e6b2d1f0a163be
---
 src/mem/cache/base_cache.hh | 2 +-
 src/mem/cache/cache_impl.hh | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index fcc040bd9..46414974b 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -126,7 +126,7 @@ class BaseCache : public MemObject
 
         void requestBus(RequestCause cause, Tick time)
         {
-            DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
+            DPRINTF(CachePort, "Asserting bus request for cause %d\n", cause);
             if (!waitingOnRetry) {
                 schedSendEvent(time);
             }
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 320e0be81..b159df84a 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1238,7 +1238,8 @@ Cache<TagStore>::MemSidePort::sendPacket()
             MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
             bool success = sendTiming(pkt);
-            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            DPRINTF(CachePort,
+                    "Address %x was %s in sending the timing request\n",
                     pkt->getAddr(), success ? "successful" : "unsuccessful");
 
             waitingOnRetry = !success;
-- 
cgit v1.2.3


From 15a51d0cae01defc116c9a937bfa8c4577f72826 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:28:52 -0700
Subject: Add CacheRepl trace flag and move a couple DPRINTFs to it.

--HG--
extra : convert_revision : 31724d19ebdf2cdc2a2bafff83d17845b3a0b183
---
 src/base/traceflags.py    | 1 +
 src/mem/cache/tags/lru.cc | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 70fadb210..8573eb9bf 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -47,6 +47,7 @@ baseFlags = [
     'BusBridge',
     'Cache',
     'CachePort',
+    'CacheRepl',
     'Chains',
     'Checker',
     'Clock',
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 3269aa4db..0a8587c20 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -173,7 +173,7 @@ LRU::findBlock(Addr addr, int &lat)
     if (blk != NULL) {
         // move this block to head of the MRU list
         sets[set].moveToHead(blk);
-        DPRINTF(Cache, "set %x: moving blk %x to MRU\n",
+        DPRINTF(CacheRepl, "set %x: moving blk %x to MRU\n",
                 set, regenerateBlkAddr(tag, set));
         if (blk->whenReady > curTick
             && blk->whenReady - curTick > hitLatency) {
@@ -208,7 +208,7 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         ++sampledRefs;
         blk->refCount = 0;
 
-        DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+        DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
                 set, regenerateBlkAddr(blk->tag, set));
     } else if (!blk->isTouched) {
         tagsInUse++;
-- 
cgit v1.2.3


From 4bcfa916f1f12e8cda253ca7154e75fa1f71ca44 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 23:49:24 -0700
Subject: New tree-based algorithm for creating more complex cache hierarchies.

--HG--
extra : convert_revision : de8dd4ef5dae0f3e084461e8ef7c549653e61d3f
---
 configs/example/memtest.py | 143 +++++++++++++++++++++++----------------------
 1 file changed, 73 insertions(+), 70 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 0e6260b5d..47853ffab 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -49,6 +49,10 @@ parser.add_option("-n", "--numtesters", type="int", default=8,
                   metavar="N",
                   help="Number of tester pseudo-CPUs [default: %default]")
 
+parser.add_option("-t", "--treespec", type="string",
+                  help="Colon-separated multilevel tree specification")
+
+
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
                   help="Target percentage of functional accesses "
@@ -69,84 +73,83 @@ if args:
      print "Error: script doesn't take any positional arguments"
      sys.exit(1)
 
-# Should generalize this someday... would be cool to have a loop that
-# just iterates, adding a level of caching each time.
-#if options.cache_levels != 2 and options.cache_levels != 0:
-#     print "Error: number of cache levels must be 0 or 2"
-#     sys.exit(1)
-
-if options.blocking:
-     num_l1_mshrs = 1
-     num_l2_mshrs = 1
-else:
-     num_l1_mshrs = 12
-     num_l2_mshrs = 92
-
 block_size = 64
 
-# --------------------
-# Base L1 Cache
-# ====================
-
-class L1(BaseCache):
-    latency = '1ns'
-    block_size = block_size
-    mshrs = num_l1_mshrs
-    tgts_per_mshr = 8
-
-# ----------------------
-# Base L2 Cache
-# ----------------------
-
-class L2(BaseCache):
-    block_size = block_size
-    latency = '10ns'
-    mshrs = num_l2_mshrs
-    tgts_per_mshr = 16
-    write_buffers = 8
-
-if options.numtesters > block_size:
+if not options.treespec:
+     # convert simple cache_levels option to treespec
+     treespec = [options.numtesters, 1]
+     numtesters = options.numtesters
+else:
+     try:
+          treespec = [int(x) for x in options.treespec.split(':')]
+          numtesters = reduce(lambda x,y: x*y, treespec)
+     except:
+          print "Error parsing treespec option"
+          sys.exit(1)
+
+if numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
            % (block_size)
      sys.exit(1)
 
-cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
-                 percent_functional=options.functional,
-                 percent_uncacheable=options.uncacheable,
-                 progress_interval=options.progress)
-         for i in xrange(options.numtesters) ]
+if len(treespec) < 1:
+     print "Error parsing treespec"
+     sys.exit(1)
 
-# system simulated
-system = System(cpu = cpus, funcmem = PhysicalMemory(),
-                physmem = PhysicalMemory(latency = "100ns"),
-                membus = Bus(clock="500MHz", width=16))
-
-# l2cache & bus
-if options.cache_levels == 2:
-    system.toL2Bus = Bus(clock="500MHz", width=16)
-    system.l2c = L2(size='64kB', assoc=8)
-    system.l2c.cpu_side = system.toL2Bus.port
-
-    # connect l2c to membus
-    system.l2c.mem_side = system.membus.port
-
-# add L1 caches
-for cpu in cpus:
-    if options.cache_levels == 2:
-         cpu.l1c = L1(size = '32kB', assoc = 4)
-         cpu.test = cpu.l1c.cpu_side
-         cpu.l1c.mem_side = system.toL2Bus.port
-    elif options.cache_levels == 1:
-         cpu.l1c = L1(size = '32kB', assoc = 4)
-         cpu.test = cpu.l1c.cpu_side
-         cpu.l1c.mem_side = system.membus.port
-    else:
-         cpu.test = system.membus.port
-    system.funcmem.port = cpu.functional
-
-# connect memory to membus
-system.physmem.port = system.membus.port
+# define prototype L1 cache
+proto_l1 = BaseCache(size = '32kB', assoc = 4, block_size = block_size,
+                     latency = '1ns', tgts_per_mshr = 8)
 
+if options.blocking:
+     proto_l1.mshrs = 1
+else:
+     proto_l1.mshrs = 8
+
+# build a list of prototypes, one for each cache level (L1 is at end,
+# followed by the tester pseudo-cpu objects)
+prototypes = [ proto_l1,
+               MemTest(atomic=options.atomic, max_loads=options.maxloads,
+                       percent_functional=options.functional,
+                       percent_uncacheable=options.uncacheable,
+                       progress_interval=options.progress) ]
+
+while len(prototypes) < len(treespec):
+     # clone previous level and update params
+     prev = prototypes[0]
+     next = prev()
+     next.size = prev.size * 4
+     next.latency = prev.latency * 10
+     next.assoc = prev.assoc * 2
+     prototypes.insert(0, next)
+
+# system simulated
+system = System(funcmem = PhysicalMemory(),
+                physmem = PhysicalMemory(latency = "100ns"))
+
+def make_level(spec, prototypes, attach_obj, attach_port):
+     fanout = spec[0]
+     parent = attach_obj # use attach obj as config parent too
+     if fanout > 1:
+          new_bus = Bus(clock="500MHz", width=16)
+          new_bus.port = getattr(attach_obj, attach_port)
+          parent.cpu_side_bus = new_bus
+          attach_obj = new_bus
+          attach_port = "port"
+     objs = [prototypes[0]() for i in xrange(fanout)]
+     if len(spec) > 1:
+          # we just built caches, more levels to go
+          parent.cache = objs
+          for cache in objs:
+               cache.mem_side = getattr(attach_obj, attach_port)
+               make_level(spec[1:], prototypes[1:], cache, "cpu_side")
+     else:
+          # we just built the MemTest objects
+          parent.cpu = objs
+          for t in objs:
+               t.test = getattr(attach_obj, attach_port)
+               t.functional = system.funcmem.port
+
+make_level(treespec, prototypes, system.physmem, "port")
 
 # -----------------------
 # run simulation
-- 
cgit v1.2.3


From ad560a6642fbb752e608c02048fc2103e60093b3 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 13:22:49 -0700
Subject: Add --force-bus option to memtest.py.

--HG--
extra : convert_revision : 101735cca426903704ff2edaff051fa7c5bfc46c
---
 configs/example/memtest.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 47853ffab..c9149865a 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -52,6 +52,8 @@ parser.add_option("-n", "--numtesters", type="int", default=8,
 parser.add_option("-t", "--treespec", type="string",
                   help="Colon-separated multilevel tree specification")
 
+parser.add_option("--force-bus", action="store_true",
+                  help="Use bus between levels even with single cache")
 
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
@@ -129,7 +131,7 @@ system = System(funcmem = PhysicalMemory(),
 def make_level(spec, prototypes, attach_obj, attach_port):
      fanout = spec[0]
      parent = attach_obj # use attach obj as config parent too
-     if fanout > 1:
+     if fanout > 1 or options.force_bus:
           new_bus = Bus(clock="500MHz", width=16)
           new_bus.port = getattr(attach_obj, attach_port)
           parent.cpu_side_bus = new_bus
-- 
cgit v1.2.3


From b1bdc3b3d9de40387a209777aa972f96792c8d6a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 14:07:31 -0700
Subject: Punt on old -n/-c memtest args. Also added comments to document
 treespec format.

--HG--
extra : convert_revision : fa9e8f66b68b96a4efca8a7fe6e7c37367382d9d
---
 configs/example/memtest.py | 51 +++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index c9149865a..e7f39d8bd 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -33,24 +33,38 @@ m5.AddToPath('../common')
 
 parser = optparse.OptionParser()
 
-parser.add_option("-c", "--cache-levels", type="int", default=2,
-                  metavar="LEVELS",
-                  help="Number of cache levels [default: %default]")
 parser.add_option("-a", "--atomic", action="store_true",
                   help="Use atomic (non-timing) mode")
 parser.add_option("-b", "--blocking", action="store_true",
                   help="Use blocking caches")
-parser.add_option("-l", "--maxloads", default="1G", metavar="N",
-                  help="Stop after N loads [default: %default]")
+parser.add_option("-l", "--maxloads", metavar="N",
+                  help="Stop after N loads")
 parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
                   metavar="T",
                   help="Stop after T ticks")
-parser.add_option("-n", "--numtesters", type="int", default=8,
-                  metavar="N",
-                  help="Number of tester pseudo-CPUs [default: %default]")
 
-parser.add_option("-t", "--treespec", type="string",
-                  help="Colon-separated multilevel tree specification")
+#
+# The "tree" specification is a colon-separated list of one or more
+# integers.  The first integer is the number of caches/testers
+# connected directly to main memory.  The last integer in the list is
+# the number of testers associated with the uppermost level of memory
+# (L1 cache, if there are caches, or main memory if no caches).  Thus
+# if there is only one integer, there are no caches, and the integer
+# specifies the number of testers connected directly to main memory.
+# The other integers (if any) specify the number of caches at each
+# level of the hierarchy between.
+#
+# Examples:
+#
+#  "2:1"    Two caches connected to memory with a single tester behind each
+#           (single-level hierarchy, two testers total)
+#
+#  "2:2:1"  Two-level hierarchy, 2 L1s behind each of 2 L2s, 4 testers total
+#
+parser.add_option("-t", "--treespec", type="string", default="8:1",
+                  help="Colon-separated multilevel tree specification, "
+                  "see script comments for details "
+                  "[default: %default]")
 
 parser.add_option("--force-bus", action="store_true",
                   help="Use bus between levels even with single cache")
@@ -77,17 +91,12 @@ if args:
 
 block_size = 64
 
-if not options.treespec:
-     # convert simple cache_levels option to treespec
-     treespec = [options.numtesters, 1]
-     numtesters = options.numtesters
-else:
-     try:
-          treespec = [int(x) for x in options.treespec.split(':')]
-          numtesters = reduce(lambda x,y: x*y, treespec)
-     except:
-          print "Error parsing treespec option"
-          sys.exit(1)
+try:
+     treespec = [int(x) for x in options.treespec.split(':')]
+     numtesters = reduce(lambda x,y: x*y, treespec)
+except:
+     print "Error parsing treespec option"
+     sys.exit(1)
 
 if numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
-- 
cgit v1.2.3


From 9172876dd7ba4877c586ced30904548539451f37 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 14:32:55 -0700
Subject: Fix problem with unset max_loads in memtest. Also make default 0, and
 make that mean run forever.

--HG--
extra : convert_revision : 3e60a52b1c5e334a9ef3d744cf7ee1d851ba4aa9
---
 configs/example/memtest.py | 14 +++++++++-----
 src/cpu/memtest/MemTest.py |  2 +-
 src/cpu/memtest/memtest.cc |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index e7f39d8bd..af100c9a9 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -37,7 +37,7 @@ parser.add_option("-a", "--atomic", action="store_true",
                   help="Use atomic (non-timing) mode")
 parser.add_option("-b", "--blocking", action="store_true",
                   help="Use blocking caches")
-parser.add_option("-l", "--maxloads", metavar="N",
+parser.add_option("-l", "--maxloads", metavar="N", default=0,
                   help="Stop after N loads")
 parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
                   metavar="T",
@@ -116,14 +116,18 @@ if options.blocking:
 else:
      proto_l1.mshrs = 8
 
-# build a list of prototypes, one for each cache level (L1 is at end,
-# followed by the tester pseudo-cpu objects)
-prototypes = [ proto_l1,
-               MemTest(atomic=options.atomic, max_loads=options.maxloads,
+# build a list of prototypes, one for each level of treespec, starting
+# at the end (last entry is tester objects)
+prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
                        percent_functional=options.functional,
                        percent_uncacheable=options.uncacheable,
                        progress_interval=options.progress) ]
 
+# next comes L1 cache, if any
+if len(treespec) > 1:
+     prototypes.insert(0, proto_l1)
+
+# now add additional cache levels (if any) by scaling L1 params
 while len(prototypes) < len(treespec):
      # clone previous level and update params
      prev = prototypes[0]
diff --git a/src/cpu/memtest/MemTest.py b/src/cpu/memtest/MemTest.py
index 381519972..a328f4734 100644
--- a/src/cpu/memtest/MemTest.py
+++ b/src/cpu/memtest/MemTest.py
@@ -33,7 +33,7 @@ from m5 import build_env
 
 class MemTest(SimObject):
     type = 'MemTest'
-    max_loads = Param.Counter("number of loads to execute")
+    max_loads = Param.Counter(0, "number of loads to execute")
     atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n")
     memory_size = Param.Int(65536, "memory size")
     percent_dest_unaligned = Param.Percent(50,
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 019b4328c..db3ca282a 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -232,7 +232,7 @@ MemTest::completeRequest(PacketPtr pkt)
             nextProgressMessage += progressInterval;
         }
 
-        if (numReads >= maxLoads)
+        if (maxLoads != 0 && numReads >= maxLoads)
             exitSimLoop("maximum number of loads reached");
         break;
 
-- 
cgit v1.2.3


From f790f34fe30aaca22b829104a8cf3f547624132a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 20:09:03 -0700
Subject: Make Bus::findPort() a little more useful. Move check for loops
 outside, since half the call sites end up working around it anyway.  Return
 integer port ID instead of port object pointer.

--HG--
extra : convert_revision : 4c31fe9930f4d1aa4919e764efb7c50d43792ea3
---
 src/mem/bus.cc | 47 ++++++++++++++++++++++++-----------------------
 src/mem/bus.hh |  6 ++----
 2 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 0cb1240f3..24a0c6f02 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -172,7 +172,7 @@ void Bus::occupyBus(PacketPtr pkt)
 bool
 Bus::recvTiming(PacketPtr pkt)
 {
-    Port *port;
+    int port_id;
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
@@ -196,8 +196,8 @@ Bus::recvTiming(PacketPtr pkt)
     // Make sure to clear the snoop commit flag so it doesn't think an
     // access has been handled twice.
     if (dest == Packet::Broadcast) {
-        port = findPort(pkt->getAddr(), pkt->getSrc());
-        timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+        port_id = findPort(pkt->getAddr());
+        timingSnoop(pkt, interfaces[port_id]);
 
         if (pkt->memInhibitAsserted()) {
             //Cache-Cache transfer occuring
@@ -213,13 +213,13 @@ Bus::recvTiming(PacketPtr pkt)
     } else {
         assert(dest >= 0 && dest < maxId);
         assert(dest != pkt->getSrc()); // catch infinite loops
-        port = interfaces[dest];
+        port_id = dest;
     }
 
     occupyBus(pkt);
 
-    if (port) {
-        if (port->sendTiming(pkt))  {
+    if (port_id != pkt->getSrc()) {
+        if (interfaces[port_id]->sendTiming(pkt))  {
             // Packet was successfully sent. Return true.
             // Also take care of retries
             if (inRetry) {
@@ -279,8 +279,8 @@ Bus::recvRetry(int id)
     }
 }
 
-Port *
-Bus::findPort(Addr addr, int id)
+int
+Bus::findPort(Addr addr)
 {
     /* An interval tree would be a better way to do this. --ali. */
     int dest_id = -1;
@@ -295,7 +295,7 @@ Bus::findPort(Addr addr, int id)
              iter != defaultRange.end(); iter++) {
             if (*iter == addr) {
                 DPRINTF(Bus, "  found addr %#llx on default\n", addr);
-                return defaultPort;
+                return defaultId;
             }
         }
 
@@ -306,18 +306,11 @@ Bus::findPort(Addr addr, int id)
             DPRINTF(Bus, "Unable to find destination for addr: %#llx, will use "
                     "default port", addr);
 
-            return defaultPort;
+            return defaultId;
         }
     }
 
-
-    // we shouldn't be sending this back to where it came from
-    // do the snoop access and then we should terminate
-    // the cyclical call.
-    if (dest_id == id)
-        return 0;
-
-    return interfaces[dest_id];
+    return dest_id;
 }
 
 void
@@ -380,7 +373,8 @@ Bus::recvAtomic(PacketPtr pkt)
     Tick snoop_response_latency = 0;
     int orig_src = pkt->getSrc();
 
-    Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
+    int target_port_id = findPort(pkt->getAddr());
+    Port *target_port = interfaces[target_port_id];
 
     SnoopIter s_end = snoopPorts.end();
     for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) {
@@ -406,7 +400,13 @@ Bus::recvAtomic(PacketPtr pkt)
         }
     }
 
-    Tick response_latency = target_port->sendAtomic(pkt);
+    Tick response_latency = 0;
+
+    // we can get requests sent up from the memory side of the bus for
+    // snooping... don't send them back down!
+    if (target_port_id != pkt->getSrc()) {
+        response_latency = target_port->sendAtomic(pkt);
+    }
 
     // if we got a response from a snooper, restore it here
     if (snoop_response_cmd != MemCmd::InvalidCmd) {
@@ -431,11 +431,12 @@ Bus::recvFunctional(PacketPtr pkt)
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
 
-    Port* port = findPort(pkt->getAddr(), pkt->getSrc());
-    functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+    int port_id = findPort(pkt->getAddr());
+    Port *port = interfaces[port_id];
+    functionalSnoop(pkt, port);
 
     // If the snooping hasn't found what we were looking for, keep going.
-    if (!pkt->isResponse() && port) {
+    if (!pkt->isResponse() && port_id != pkt->getSrc()) {
         port->sendFunctional(pkt);
     }
 }
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index bd51337ed..a19420244 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -176,11 +176,9 @@ class Bus : public MemObject
     /** Find which port connected to this bus (if any) should be given a packet
      * with this address.
      * @param addr Address to find port for.
-     * @param id Id of the port this packet was received from (to prevent
-     *             loops)
-     * @return pointer to port that the packet should be sent out of.
+     * @return id of port that the packet should be sent out of.
      */
-    Port *findPort(Addr addr, int id);
+    int findPort(Addr addr);
 
     /** Snoop all relevant ports functionally. */
     void functionalSnoop(PacketPtr pkt, Port *responder);
-- 
cgit v1.2.3


From 884807a68ad7e4f390660b3becfe4ee094334e95 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 20:11:06 -0700
Subject: Fix up a bunch of multilevel coherence issues. Atomic mode seems to
 work.  Timing is closer but not there yet.

--HG--
extra : convert_revision : 0dea5c3d4b973d009e9d4a4c21b9cad15961d56f
---
 configs/example/memtest.py  |  2 +-
 src/cpu/memtest/memtest.cc  |  4 +-
 src/cpu/o3/lsq_impl.hh      |  7 ++--
 src/mem/bus.cc              |  5 ++-
 src/mem/cache/cache_impl.hh | 94 ++++++++++++++++++++++++++++++++++++++++-----
 src/mem/packet.hh           | 12 ++++--
 6 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index af100c9a9..5bb874e85 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -144,7 +144,7 @@ system = System(funcmem = PhysicalMemory(),
 def make_level(spec, prototypes, attach_obj, attach_port):
      fanout = spec[0]
      parent = attach_obj # use attach obj as config parent too
-     if fanout > 1 or options.force_bus:
+     if len(spec) > 1 and (fanout > 1 or options.force_bus):
           new_bus = Bus(clock="500MHz", width=16)
           new_bus.port = getattr(attach_obj, attach_port)
           parent.cpu_side_bus = new_bus
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index db3ca282a..f5c8bb93b 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -64,7 +64,9 @@ MemTest::CpuPort::recvTiming(PacketPtr pkt)
 Tick
 MemTest::CpuPort::recvAtomic(PacketPtr pkt)
 {
-    panic("MemTest doesn't expect recvAtomic callback!");
+    // must be snoop upcall
+    assert(pkt->isRequest());
+    assert(pkt->getDest() == Packet::Broadcast);
     return curTick;
 }
 
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index b4a6a02da..10c0afd38 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -84,9 +84,10 @@ LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
         lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
     }
     else {
-    //else it is a coherence request, maybe you need to do something
-        warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
-             "update LSQ for these\n");
+        // must be a snoop
+
+        // @TODO someday may need to process invalidations in LSQ here
+        // to provide stronger consistency model
     }
     return true;
 }
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 24a0c6f02..e70558bd6 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -183,8 +183,9 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (tickNextIdle > curTick ||
-        (retryList.size() && (!inRetry || pktPort != retryList.front())))
+    if (!pkt->isExpressSnoop() &&
+        (tickNextIdle > curTick ||
+         (retryList.size() && (!inRetry || pktPort != retryList.front()))))
     {
         addToRetryList(pktPort);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b159df84a..59571dd6f 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -165,11 +165,25 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
             blk->trackLoadLocked(pkt);
         }
         pkt->setDataFromBlock(blk->data, blkSize);
+        if (pkt->getSize() == blkSize) {
+            // special handling for coherent block requests from
+            // upper-level caches
+            if (pkt->needsExclusive()) {
+                // on ReadExReq we give up our copy
+                tags->invalidateBlk(blk);
+            } else {
+                // on ReadReq we create shareable copies here and in
+                // the requester
+                pkt->assertShared();
+                blk->status &= ~BlkWritable;
+            }
+        }
     } else {
         // Not a read or write... must be an upgrade.  it's OK
         // to just ack those as long as we have an exclusive
         // copy at this level.
         assert(pkt->cmd == MemCmd::UpgradeReq);
+        tags->invalidateBlk(blk);
     }
 }
 
@@ -269,6 +283,18 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
             satisfyCpuSideRequest(pkt, blk);
+        } else if (pkt->cmd == MemCmd::Writeback) {
+            // special case: writeback to read-only block (e.g., from
+            // L1 into L2).  since we're really just passing ownership
+            // from one cache to another, we can update this cache to
+            // be the owner without making the block writeable
+            assert(!blk->isWritable() /* && !blk->isDirty() */);
+            assert(blkSize == pkt->getSize());
+            std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+            blk->status |= BlkDirty;
+            satisfied = true;
+            // nothing else to do; writeback doesn't expect response
+            assert(!pkt->needsResponse());
         } else {
             // permission violation... nothing to do here, leave unsatisfied
             // for statistics purposes this counts like a complete miss
@@ -363,9 +389,10 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     bool needsResponse = pkt->needsResponse();
 
     if (satisfied) {
-        assert(needsResponse);
-        pkt->makeTimingResponse();
-        cpuSidePort->respond(pkt, curTick+lat);
+        if (needsResponse) {
+            pkt->makeTimingResponse();
+            cpuSidePort->respond(pkt, curTick+lat);
+        }
     } else {
         // miss
         if (prefetchMiss)
@@ -456,10 +483,30 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
 {
     int lat = hitLatency;
 
+    // @TODO: make this a parameter
+    bool last_level_cache = false;
+
     if (pkt->memInhibitAsserted()) {
-        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
-                pkt->getAddr());
         assert(!pkt->req->isUncacheable());
+        // have to invalidate ourselves and any lower caches even if
+        // upper cache will be responding
+        if (pkt->isInvalidate()) {
+            BlkType *blk = tags->findBlock(pkt->getAddr());
+            if (blk && blk->isValid()) {
+                tags->invalidateBlk(blk);
+                DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: invalidating\n",
+                        pkt->cmdString(), pkt->getAddr());
+            }
+            if (!last_level_cache) {
+                DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n",
+                        pkt->cmdString(), pkt->getAddr());
+                lat += memSidePort->sendAtomic(pkt);
+            }
+        } else {
+            DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n",
+                    pkt->cmdString(), pkt->getAddr());
+        }
+
         return lat;
     }
 
@@ -791,9 +838,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
         assert(pkt->isRead() || blk->isValid());
     }
 
-    if (pkt->needsExclusive()) {
-        blk->status = BlkValid | BlkWritable | BlkDirty;
-    } else if (!pkt->sharedAsserted()) {
+    if (pkt->needsExclusive() || !pkt->sharedAsserted()) {
         blk->status = BlkValid | BlkWritable;
     } else {
         blk->status = BlkValid;
@@ -839,6 +884,37 @@ void
 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
                              bool is_timing, bool is_deferred)
 {
+    assert(pkt->isRequest());
+
+    // first propagate snoop upward to see if anyone above us wants to
+    // handle it.  save & restore packet src since it will get
+    // rewritten to be relative to cpu-side bus (if any)
+    bool alreadySupplied = pkt->memInhibitAsserted();
+    bool upperSupply = false;
+    if (is_timing) {
+        Packet *snoopPkt = new Packet(pkt, true);  // clear flags
+        snoopPkt->setExpressSnoop();
+        cpuSidePort->sendTiming(snoopPkt);
+        if (snoopPkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache
+            assert(!alreadySupplied);
+            pkt->assertMemInhibit();
+        }
+        if (snoopPkt->sharedAsserted()) {
+            pkt->assertShared();
+        }
+        delete snoopPkt;
+    } else {
+        int origSrc = pkt->getSrc();
+        cpuSidePort->sendAtomic(pkt);
+        if (!alreadySupplied && pkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache:
+            // forward response to original requester
+            assert(pkt->isResponse());
+        }
+        pkt->setSrc(origSrc);
+    }
+
     if (!blk || !blk->isValid()) {
         return;
     }
@@ -846,7 +922,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we may end up modifying both the block state and the packet (if
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
-    bool supply = blk->isDirty() && pkt->isRead();
+    bool supply = blk->isDirty() && pkt->isRead() && !upperSupply;
     bool invalidate = pkt->isInvalidate();
 
     if (pkt->isRead() && !pkt->isInvalidate()) {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index c90842dee..036bd3fd7 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -252,9 +252,11 @@ class Packet : public FastAlloc
     bool destValid;
 
     enum Flag {
-        // Snoop flags
+        // Snoop response flags
         MemInhibit,
         Shared,
+        // Special control flags
+        ExpressSnoop,
         NUM_PACKET_FLAGS
     };
 
@@ -317,6 +319,10 @@ class Packet : public FastAlloc
     bool memInhibitAsserted()   { return flags[MemInhibit]; }
     bool sharedAsserted()       { return flags[Shared]; }
 
+    // Special control flags
+    void setExpressSnoop()      { flags[ExpressSnoop] = true; }
+    bool isExpressSnoop()       { return flags[ExpressSnoop]; }
+
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
     // field, etc.)
@@ -372,7 +378,7 @@ class Packet : public FastAlloc
      * that, as we can't guarantee that the new packet's lifetime is
      * less than that of the original packet.  In this case the new
      * packet should allocate its own data. */
-    Packet(Packet *origPkt)
+    Packet(Packet *origPkt, bool clearFlags = false)
         :  cmd(origPkt->cmd), req(origPkt->req),
            data(origPkt->staticData ? origPkt->data : NULL),
            staticData(origPkt->staticData),
@@ -381,7 +387,7 @@ class Packet : public FastAlloc
            src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid),
            srcValid(origPkt->srcValid), destValid(origPkt->destValid),
-           flags(origPkt->flags),
+           flags(clearFlags ? 0 : origPkt->flags),
            time(curTick), senderState(origPkt->senderState)
     {
     }
-- 
cgit v1.2.3


From e80ab26abc325db0c99b153aa9758ad0df66c77c Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 23:30:22 -0400
Subject: Add valgrind-suppressions file.

--HG--
extra : convert_revision : 8c912aa723f2532c728b47a0bd83c3f3f27d7dfe
---
 util/valgrind-suppressions | 113 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 util/valgrind-suppressions

diff --git a/util/valgrind-suppressions b/util/valgrind-suppressions
new file mode 100644
index 000000000..861eb5c3d
--- /dev/null
+++ b/util/valgrind-suppressions
@@ -0,0 +1,113 @@
+{
+   python error
+   Memcheck:Cond
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Addr1
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Addr2
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Addr4
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Addr8
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Addr16
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Value1
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Value2
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Value4
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Value8
+   fun:PyObject_Free
+}
+{
+   python error
+   Memcheck:Value16
+   fun:PyObject_Free
+}
+
+{
+   python error
+   Memcheck:Cond
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Addr1
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Addr2
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Addr4
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Addr8
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Addr16
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Value1
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Value2
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Value4
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Value8
+   fun:PyObject_Realloc
+}
+{
+   python error
+   Memcheck:Value16
+   fun:PyObject_Realloc
+}
+
+
-- 
cgit v1.2.3


From f67c8b33cc57d38b102154c540456ee2c0444e63 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 21:03:12 -0700
Subject: Fix bug with timing snoop upcalls to MemTest object.

--HG--
extra : convert_revision : 1940a5d231b4f856cf69578f68ea98435824dbd8
---
 src/cpu/memtest/memtest.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index f5c8bb93b..77816e8d1 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -57,7 +57,13 @@ int TESTER_ALLOCATOR=0;
 bool
 MemTest::CpuPort::recvTiming(PacketPtr pkt)
 {
-    memtest->completeRequest(pkt);
+    if (pkt->isResponse()) {
+        memtest->completeRequest(pkt);
+    } else {
+        // must be snoop upcall
+        assert(pkt->isRequest());
+        assert(pkt->getDest() == Packet::Broadcast);
+    }
     return true;
 }
 
-- 
cgit v1.2.3


From ff13827ccb559890f05b2e1d97bc6ecf86f9dd16 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 17 Jul 2007 06:23:11 -0700
Subject: Assert that an mshr has a target in getTarget().

--HG--
extra : convert_revision : 08091670fc319876012ed139fcd2584c364a980c
---
 src/mem/cache/miss/mshr.hh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 293f290b8..a27f465aa 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -181,11 +181,17 @@ public:
      */
     TargetList* getTargetList() { return &targets; }
 
+    /**
+     * Returns true if there are targets left.
+     * @return true if there are targets
+     */
+    bool hasTargets() { return !targets.empty(); }
+
     /**
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Target *getTarget() { return &targets.front(); }
+    Target *getTarget() { assert(hasTargets());  return &targets.front(); }
 
     /**
      * Pop first target.
@@ -196,12 +202,6 @@ public:
         targets.pop_front();
     }
 
-    /**
-     * Returns true if there are targets left.
-     * @return true if there are targets
-     */
-    bool hasTargets() { return !targets.empty(); }
-
     bool isSimpleForward()
     {
         if (getNumTargets() != 1)
-- 
cgit v1.2.3


From a25f3ac67f9c467cf945b45fbc0f4e587e640cab Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 17 Jul 2007 06:33:28 -0700
Subject: Forward cache-to-cache responses through other caches.

--HG--
extra : convert_revision : 5b6a02255bccd98b00949703cf4ba4b221553cea
---
 src/mem/cache/cache_impl.hh | 56 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 59571dd6f..c069d8ba9 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -315,6 +315,29 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 }
 
 
+class ForwardResponseRecord : public Packet::SenderState
+{
+    Packet::SenderState *prevSenderState;
+    int prevSrc;
+#ifndef NDEBUG
+    BaseCache *cache;
+#endif
+  public:
+    ForwardResponseRecord(Packet *pkt, BaseCache *_cache)
+        : prevSenderState(pkt->senderState), prevSrc(pkt->getSrc())
+#ifndef NDEBUG
+          , cache(_cache)
+#endif
+    {}
+    void restore(Packet *pkt, BaseCache *_cache)
+    {
+        assert(_cache == cache);
+        pkt->senderState = prevSenderState;
+        pkt->setDest(prevSrc);
+    }
+};
+
+
 template<class TagStore>
 bool
 Cache<TagStore>::timingAccess(PacketPtr pkt)
@@ -325,6 +348,19 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     // we charge hitLatency for doing just about anything here
     Tick time =  curTick + hitLatency;
 
+    if (pkt->isResponse()) {
+        // must be cache-to-cache response from upper to lower level
+        ForwardResponseRecord *rec =
+            dynamic_cast<ForwardResponseRecord *>(pkt->senderState);
+        assert(rec != NULL);
+        rec->restore(pkt, this);
+        delete rec;
+        memSidePort->respond(pkt, time);
+        return true;
+    }
+
+    assert(pkt->isRequest());
+
     if (pkt->memInhibitAsserted()) {
         DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
                 pkt->getAddr());
@@ -392,6 +428,8 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         if (needsResponse) {
             pkt->makeTimingResponse();
             cpuSidePort->respond(pkt, curTick+lat);
+        } else {
+            delete pkt;
         }
     } else {
         // miss
@@ -424,12 +462,6 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         }
     }
 
-    if (!needsResponse) {
-        // Need to clean up the packet on a writeback miss, but leave
-        // the request for the next level.
-        delete pkt;
-    }
-
     return true;
 }
 
@@ -872,7 +904,14 @@ Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
 {
     // timing-mode snoop responses require a new packet, unless we
     // already made a copy...
-    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
+    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt, true);
+    if (!req_pkt->isInvalidate()) {
+        // note that we're ignoring the shared flag on req_pkt... it's
+        // basically irrelveant, as we'll always assert shared unless
+        // it's an exclusive request, in which case the shared line
+        // should never be asserted1
+        pkt->assertShared();
+    }
     pkt->allocate();
     pkt->makeTimingResponse();
     pkt->setDataFromBlock(blk_data, blkSize);
@@ -894,11 +933,14 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
+        snoopPkt->senderState = new ForwardResponseRecord(pkt, this);
         cpuSidePort->sendTiming(snoopPkt);
         if (snoopPkt->memInhibitAsserted()) {
             // cache-to-cache response from some upper cache
             assert(!alreadySupplied);
             pkt->assertMemInhibit();
+        } else {
+            delete snoopPkt->senderState;
         }
         if (snoopPkt->sharedAsserted()) {
             pkt->assertShared();
-- 
cgit v1.2.3


From a67a0025b3da9605f1cd41c75bff5dba2175a0dd Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 17 Jul 2007 08:15:23 -0700
Subject: Make sure responses never get blocked.

--HG--
extra : convert_revision : 29f359d743994a94dc403aa0621ba72cd137d1a1
---
 src/mem/bus.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index e70558bd6..da8df06ea 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -183,7 +183,7 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (!pkt->isExpressSnoop() &&
+    if (!(pkt->isResponse() || pkt->isExpressSnoop()) &&
         (tickNextIdle > curTick ||
          (retryList.size() && (!inRetry || pktPort != retryList.front()))))
     {
@@ -194,8 +194,6 @@ Bus::recvTiming(PacketPtr pkt)
 
     short dest = pkt->getDest();
 
-    // Make sure to clear the snoop commit flag so it doesn't think an
-    // access has been handled twice.
     if (dest == Packet::Broadcast) {
         port_id = findPort(pkt->getAddr());
         timingSnoop(pkt, interfaces[port_id]);
@@ -234,6 +232,8 @@ Bus::recvTiming(PacketPtr pkt)
         }
 
         // Packet not successfully sent. Leave or put it on the retry list.
+        // illegal to block responses... can lead to deadlock
+        assert(!pkt->isResponse());
         DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n",
                 pktPort->getId());
         addToRetryList(pktPort);
-- 
cgit v1.2.3


From 91178600947e174041f46f54e4241cedd01bbb34 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 21 Jul 2007 13:45:17 -0700
Subject: Several more fixes for multi-level timing coherence. - Add "deferred
 snoop" flag to Packet so upper-level caches   can distinguish whether
 lower-level cache request was   in-service or not at the time of the original
 snoop. - Revamp response handling to properly handle deferred snoops   on
 non-cache-fill requests (i.e. upgrades). - Make sure forwarded writebacks are
 kept in write buffer at   lower-level caches so they get snooped properly.

--HG--
extra : convert_revision : 17f8a3772a1ae31a16991a53f8225ddf54d31fc9
---
 src/mem/cache/base_cache.hh |  26 +++---
 src/mem/cache/cache_impl.hh | 195 ++++++++++++++++++++++----------------------
 src/mem/cache/miss/mshr.cc  |  18 ++--
 src/mem/cache/miss/mshr.hh  |   2 +-
 src/mem/packet.hh           |   3 +
 5 files changed, 122 insertions(+), 122 deletions(-)

diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 46414974b..719ab0245 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -410,28 +410,28 @@ class BaseCache : public MemObject
 
     MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus)
     {
+        assert(!pkt->req->isUncacheable());
         return allocateBufferInternal(&mshrQueue,
                                       blockAlign(pkt->getAddr()), blkSize,
                                       pkt, time, requestBus);
     }
 
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    MSHR *allocateWriteBuffer(PacketPtr pkt, Tick time, bool requestBus)
     {
-        MSHRQueue *mq = NULL;
-
-        if (pkt->isWrite() && !pkt->isRead()) {
-            /**
-             * @todo Add write merging here.
-             */
-            mq = &writeBuffer;
-        } else {
-            mq = &mshrQueue;
-        }
-
-        return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+        assert(pkt->isWrite() && !pkt->isRead());
+        return allocateBufferInternal(&writeBuffer,
+                                      pkt->getAddr(), pkt->getSize(),
                                       pkt, time, requestBus);
     }
 
+    MSHR *allocateUncachedReadBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        assert(pkt->req->isUncacheable());
+        assert(pkt->isRead());
+        return allocateBufferInternal(&mshrQueue,
+                                      pkt->getAddr(), pkt->getSize(),
+                                      pkt, time, requestBus);
+    }
 
     /**
      * Returns true if the cache is blocked for accesses.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index c069d8ba9..b78360d4a 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -369,7 +369,12 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     }
 
     if (pkt->req->isUncacheable()) {
-        allocateBuffer(pkt, time, true);
+        // writes go in write buffer, reads use MSHR
+        if (pkt->isWrite() && !pkt->isRead()) {
+            allocateWriteBuffer(pkt, time, true);
+        } else {
+            allocateUncachedReadBuffer(pkt, time, true);
+        }
         assert(pkt->needsResponse()); // else we should delete it here??
         return true;
     }
@@ -417,7 +422,7 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateBuffer(wbPkt, time, true);
+        allocateWriteBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
 #endif
@@ -458,7 +463,11 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
             // always mark as cache fill for now... if we implement
             // no-write-allocate or bypass accesses this will have to
             // be changed.
-            allocateMissBuffer(pkt, time, true);
+            if (pkt->cmd == MemCmd::Writeback) {
+                allocateWriteBuffer(pkt, time, true);
+            } else {
+                allocateMissBuffer(pkt, time, true);
+            }
         }
     }
 
@@ -492,6 +501,10 @@ Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
     assert(cpu_pkt->needsResponse());
 
     MemCmd cmd;
+    // @TODO make useUpgrades a parameter.
+    // Note that ownership protocols require upgrade, otherwise a
+    // write miss on a shared owned block will generate a ReadExcl,
+    // which will clobber the owned copy.
     const bool useUpgrades = true;
     if (blkValid && useUpgrades) {
         // only reason to be here is that blk is shared
@@ -648,62 +661,6 @@ Cache<TagStore>::functionalAccess(PacketPtr pkt,
 /////////////////////////////////////////////////////
 
 
-template<class TagStore>
-bool
-Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                             BlkType *blk)
-{
-    // respond to MSHR targets, if any
-
-    // First offset for critical word first calculations
-    int initial_offset = 0;
-
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
-    }
-
-    while (mshr->hasTargets()) {
-        MSHR::Target *target = mshr->getTarget();
-
-        if (target->isCpuSide()) {
-            satisfyCpuSideRequest(target->pkt, blk);
-            // How many bytes pass the first request is this one
-            int transfer_offset =
-                target->pkt->getOffset(blkSize) - initial_offset;
-            if (transfer_offset < 0) {
-                transfer_offset += blkSize;
-            }
-
-            // If critical word (no offset) return first word time
-            Tick completion_time = tags->getHitLatency() +
-                transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-            if (!target->pkt->req->isUncacheable()) {
-                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                    completion_time - target->recvTime;
-            }
-            target->pkt->makeTimingResponse();
-            cpuSidePort->respond(target->pkt, completion_time);
-        } else {
-            // response to snoop request
-            DPRINTF(Cache, "processing deferred snoop...\n");
-            handleSnoop(target->pkt, blk, true, true);
-        }
-
-        mshr->popTarget();
-    }
-
-    if (mshr->promoteDeferredTargets()) {
-        MSHRQueue *mq = mshr->queue;
-        mq->markPending(mshr);
-        requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-        return false;
-    }
-
-    return true;
-}
-
-
 template<class TagStore>
 void
 Cache<TagStore>::handleResponse(PacketPtr pkt)
@@ -730,68 +687,105 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         noTargetMSHR = NULL;
     }
 
-    // Can we deallocate MSHR when done?
-    bool deallocate = false;
-
     // Initial target is used just for stats
     MSHR::Target *initial_tgt = mshr->getTarget();
+    BlkType *blk = tags->findBlock(pkt->getAddr());
     int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
     Tick miss_latency = curTick - initial_tgt->recvTime;
+    PacketList writebacks;
 
-    if (mshr->isCacheFill) {
+    if (pkt->req->isUncacheable()) {
+        mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+            miss_latency;
+    } else {
         mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
             miss_latency;
+    }
+
+    if (mshr->isCacheFill) {
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
-        BlkType *blk = tags->findBlock(pkt->getAddr());
 
         // give mshr a chance to do some dirty work
         mshr->handleFill(pkt, blk);
 
-        PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
-        deallocate = satisfyMSHR(mshr, pkt, blk);
-        // copy writebacks to write buffer
-        while (!writebacks.empty()) {
-            PacketPtr wbPkt = writebacks.front();
-            allocateBuffer(wbPkt, time, true);
-            writebacks.pop_front();
-        }
-        // if we used temp block, clear it out
-        if (blk == tempBlock) {
-            if (blk->isDirty()) {
-                allocateBuffer(writebackBlk(blk), time, true);
-            }
-            tags->invalidateBlk(blk);
-        }
-    } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
-                miss_latency;
-        }
+        assert(blk != NULL);
+    }
 
-        while (mshr->hasTargets()) {
-            MSHR::Target *target = mshr->getTarget();
-            assert(target->isCpuSide());
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                target->pkt->setData(pkt->getPtr<uint8_t>());
+    // First offset for critical word first calculations
+    int initial_offset = 0;
+
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
+    }
+
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
+
+        if (target->isCpuSide()) {
+            Tick completion_time;
+            if (blk != NULL) {
+                satisfyCpuSideRequest(target->pkt, blk);
+                // How many bytes pass the first request is this one
+                int transfer_offset =
+                    target->pkt->getOffset(blkSize) - initial_offset;
+                if (transfer_offset < 0) {
+                    transfer_offset += blkSize;
+                }
+
+                // If critical word (no offset) return first word time
+                completion_time = tags->getHitLatency() +
+                    transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+
+                if (!target->pkt->req->isUncacheable()) {
+                    missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                        completion_time - target->recvTime;
+                }
+            } else {
+                // not a cache fill, just forwarding response
+                completion_time = tags->getHitLatency() + pkt->finishTime;
+                if (pkt->isRead()) {
+                    target->pkt->setData(pkt->getPtr<uint8_t>());
+                }
             }
             target->pkt->makeTimingResponse();
-            cpuSidePort->respond(target->pkt, time);
+            cpuSidePort->respond(target->pkt, completion_time);
+        } else {
+            // response to snoop request
+            DPRINTF(Cache, "processing deferred snoop...\n");
+            handleSnoop(target->pkt, blk, true, true);
         }
-        assert(!mshr->hasTargets());
-        deallocate = true;
-    }
 
-    delete pkt;
+        mshr->popTarget();
+    }
 
-    if (deallocate) {
+    if (mshr->promoteDeferredTargets()) {
+        MSHRQueue *mq = mshr->queue;
+        mq->markPending(mshr);
+        requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+    } else {
         mq->deallocate(mshr);
         if (wasFull && !mq->isFull()) {
             clearBlocked((BlockedCause)mq->index);
         }
     }
+
+    // copy writebacks to write buffer
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        allocateWriteBuffer(wbPkt, time, true);
+        writebacks.pop_front();
+    }
+    // if we used temp block, clear it out
+    if (blk == tempBlock) {
+        if (blk->isDirty()) {
+            allocateWriteBuffer(writebackBlk(blk), time, true);
+        }
+        tags->invalidateBlk(blk);
+    }
+
+    delete pkt;
 }
 
 
@@ -933,6 +927,9 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
+        if (is_deferred) {
+            snoopPkt->setDeferredSnoop();
+        }
         snoopPkt->senderState = new ForwardResponseRecord(pkt, this);
         cpuSidePort->sendTiming(snoopPkt);
         if (snoopPkt->memInhibitAsserted()) {
@@ -1020,12 +1017,11 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
     // better not be snooping a request that conflicts with something
     // we have outstanding...
-    if (mshr && mshr->inService) {
+    if (mshr && mshr->handleSnoop(pkt, order++)) {
         DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n",
                 blk_addr);
-        mshr->allocateSnoopTarget(pkt, curTick, order++);
         if (mshr->getNumTargets() > numTarget)
-           warn("allocating bonus target for snoop"); //handle later
+            warn("allocating bonus target for snoop"); //handle later
         return;
     }
 
@@ -1226,6 +1222,7 @@ template<class TagStore>
 bool
 Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
+    // illegal to block responses... can lead to deadlock
     if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 5d5e63f90..7ba3789fe 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -119,25 +119,23 @@ MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order)
     ++ntargets;
 }
 
-void
-MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order)
+bool
+MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 {
-    assert(inService); // don't bother to call otherwise
+    if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) {
+        return false;
+    }
 
     if (pendingInvalidate) {
         // a prior snoop has already appended an invalidation, so
         // logically we don't have the block anymore...
-        return;
+        return true;
     }
 
-    DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr,
-            needsExclusive ? "needsExclusive" : "",
-            pkt->needsExclusive() ? "pkt->needsExclusive()" : "");
-
     if (needsExclusive || pkt->needsExclusive()) {
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
-        targets.push_back(Target(new Packet(pkt), whenReady, _order, false));
+        targets.push_back(Target(new Packet(pkt), curTick, _order, false));
         ++ntargets;
 
         if (needsExclusive) {
@@ -157,6 +155,8 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order)
         pendingShared = true;
         pkt->assertShared();
     }
+
+    return true;
 }
 
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index a27f465aa..9c6a8cf33 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -162,7 +162,7 @@ public:
      * @param target The target.
      */
     void allocateTarget(PacketPtr target, Tick when, Counter order);
-    void allocateSnoopTarget(PacketPtr target, Tick when, Counter order);
+    bool handleSnoop(PacketPtr target, Counter order);
 
     /** A simple constructor. */
     MSHR();
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 036bd3fd7..8063c7ae7 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -257,6 +257,7 @@ class Packet : public FastAlloc
         Shared,
         // Special control flags
         ExpressSnoop,
+        DeferredSnoop,
         NUM_PACKET_FLAGS
     };
 
@@ -322,6 +323,8 @@ class Packet : public FastAlloc
     // Special control flags
     void setExpressSnoop()      { flags[ExpressSnoop] = true; }
     bool isExpressSnoop()       { return flags[ExpressSnoop]; }
+    void setDeferredSnoop()     { flags[DeferredSnoop] = true; }
+    bool isDeferredSnoop()      { return flags[DeferredSnoop]; }
 
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
-- 
cgit v1.2.3


From 92ce2b59743c8cace420147e276d7376a4b905f1 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 21 Jul 2007 18:18:42 -0700
Subject: Deal with invalidations intersecting outstanding upgrades. If the
 invalidation beats the upgrade at a lower level then the upgrade must be
 converted to a read exclusive "in the field". Restructure target list &
 deferred target list to factor out some common code.

--HG--
extra : convert_revision : 7bab4482dd6c48efdb619610f0d3778c60ff777a
---
 src/mem/cache/cache_impl.hh |   4 +-
 src/mem/cache/miss/mshr.cc  | 153 +++++++++++++++++++++++++++++++++-----------
 src/mem/cache/miss/mshr.hh  |  35 ++++++----
 3 files changed, 139 insertions(+), 53 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b78360d4a..9fb5cdbde 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -836,7 +836,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
                 // must be an outstanding upgrade request on block
                 // we're about to replace...
                 assert(!blk->isWritable());
-                assert(repl_mshr->needsExclusive);
+                assert(repl_mshr->needsExclusive());
                 // too hard to replace block with transient state;
                 // just use temporary storage to complete the current
                 // request and then get rid of it
@@ -1177,7 +1177,7 @@ Cache<TagStore>::getTimingPacket()
         pkt = tgt_pkt;
     } else {
         BlkType *blk = tags->findBlock(mshr->addr);
-        pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+        pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive());
 
         mshr->isCacheFill = (pkt != NULL);
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 7ba3789fe..856819c10 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -52,8 +52,51 @@ MSHR::MSHR()
     inService = false;
     ntargets = 0;
     threadNum = -1;
+    targets = new TargetList();
+    deferredTargets = new TargetList();
 }
 
+
+MSHR::TargetList::TargetList()
+    : needsExclusive(false), hasUpgrade(false)
+{}
+
+
+inline void
+MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide)
+{
+    if (cpuSide) {
+        if (pkt->needsExclusive()) {
+            needsExclusive = true;
+        }
+
+        if (pkt->cmd == MemCmd::UpgradeReq) {
+            hasUpgrade = true;
+        }
+    }
+
+    push_back(Target(pkt, readyTime, order, cpuSide));
+}
+
+
+void
+MSHR::TargetList::replaceUpgrades()
+{
+    if (!hasUpgrade)
+        return;
+
+    Iterator end_i = end();
+    for (Iterator i = begin(); i != end_i; ++i) {
+        if (i->pkt->cmd == MemCmd::UpgradeReq) {
+            i->pkt->cmd = MemCmd::ReadExReq;
+            DPRINTF(Cache, "Replacing UpgradeReq with ReadExReq\n");
+        }
+    }
+
+    hasUpgrade = false;
+}
+
+
 void
 MSHR::allocate(Addr _addr, int _size, PacketPtr target,
                Tick whenReady, Counter _order)
@@ -64,16 +107,15 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     order = _order;
     assert(target);
     isCacheFill = false;
-    needsExclusive = target->needsExclusive();
     _isUncacheable = target->req->isUncacheable();
     inService = false;
     threadNum = 0;
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem-side request), so set cpuSide to true here.
-    targets.push_back(Target(target, whenReady, _order, true));
-    assert(deferredTargets.empty());
-    deferredNeedsExclusive = false;
+    assert(targets->isReset());
+    targets->add(target, whenReady, _order, true);
+    assert(deferredTargets->isReset());
     pendingInvalidate = false;
     pendingShared = false;
     data = NULL;
@@ -82,8 +124,9 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
 void
 MSHR::deallocate()
 {
-    assert(targets.empty());
-    assert(deferredTargets.empty());
+    assert(targets->empty());
+    targets->resetFlags();
+    assert(deferredTargets->isReset());
     assert(ntargets == 0);
     inService = false;
     //allocIter = NULL;
@@ -94,26 +137,25 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order)
+MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 {
-    if (inService) {
-        if (!deferredTargets.empty() || pendingInvalidate ||
-            (!needsExclusive && target->needsExclusive())) {
-            // need to put on deferred list
-            deferredTargets.push_back(Target(target, whenReady, _order, true));
-            if (target->needsExclusive()) {
-                deferredNeedsExclusive = true;
-            }
-        } else {
-            // still OK to append to outstanding request
-            targets.push_back(Target(target, whenReady, _order, true));
-        }
+    // if there's a request already in service for this MSHR, we will
+    // have to defer the new target until after the response if any of
+    // the following are true:
+    // - there are other targets already deferred
+    // - there's a pending invalidate to be applied after the response
+    //   comes back (but before this target is processed)
+    // - the outstanding request is for a non-exclusive block and this
+    //   target requires an exclusive block
+    if (inService &&
+        (!deferredTargets->empty() || pendingInvalidate ||
+         (!targets->needsExclusive && pkt->needsExclusive()))) {
+        // need to put on deferred list
+        deferredTargets->add(pkt, whenReady, _order, true);
     } else {
-        if (target->needsExclusive()) {
-            needsExclusive = true;
-        }
-
-        targets.push_back(Target(target, whenReady, _order, true));
+        // no request outstanding, or still OK to append to
+        // outstanding request
+        targets->add(pkt, whenReady, _order, true);
     }
 
     ++ntargets;
@@ -123,22 +165,50 @@ bool
 MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 {
     if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) {
+        // Request has not been issued yet, or it's been issued
+        // locally but is buffered unissued at some downstream cache
+        // which is forwarding us this snoop.  Either way, the packet
+        // we're snooping logically precedes this MSHR's request, so
+        // the snoop has no impact on the MSHR, but must be processed
+        // in the standard way by the cache.  The only exception is
+        // that if we're an L2+ cache buffering an UpgradeReq from a
+        // higher-level cache, and the snoop is invalidating, then our
+        // buffered upgrades must be converted to read exclusives,
+        // since the upper-level cache no longer has a valid copy.
+        // That is, even though the upper-level cache got out on its
+        // local bus first, some other invalidating transaction
+        // reached the global bus before the upgrade did.
+        if (pkt->needsExclusive()) {
+            targets->replaceUpgrades();
+            deferredTargets->replaceUpgrades();
+        }
+
         return false;
     }
 
+    // From here on down, the request issued by this MSHR logically
+    // precedes the request we're snooping.
+
+    if (pkt->needsExclusive()) {
+        // snooped request still precedes the re-request we'll have to
+        // issue for deferred targets, if any...
+        deferredTargets->replaceUpgrades();
+    }
+
     if (pendingInvalidate) {
         // a prior snoop has already appended an invalidation, so
-        // logically we don't have the block anymore...
+        // logically we don't have the block anymore; no need for
+        // further snooping.
         return true;
     }
 
-    if (needsExclusive || pkt->needsExclusive()) {
+    if (targets->needsExclusive || pkt->needsExclusive()) {
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
-        targets.push_back(Target(new Packet(pkt), curTick, _order, false));
+        targets->add(new Packet(pkt), curTick, _order, false);
         ++ntargets;
 
-        if (needsExclusive) {
+        if (targets->needsExclusive) {
             // We're awaiting an exclusive copy, so ownership is pending.
             // It's up to us to respond once the data arrives.
             pkt->assertMemInhibit();
@@ -163,21 +233,25 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 bool
 MSHR::promoteDeferredTargets()
 {
-    if (deferredTargets.empty()) {
+    assert(targets->empty());
+    if (deferredTargets->empty()) {
         return false;
     }
 
-    assert(targets.empty());
+    // swap targets & deferredTargets lists
+    TargetList *tmp = targets;
     targets = deferredTargets;
-    deferredTargets.clear();
-    assert(targets.size() == ntargets);
+    deferredTargets = tmp;
+
+    assert(targets->size() == ntargets);
+
+    // clear deferredTargets flags
+    deferredTargets->resetFlags();
 
-    needsExclusive = deferredNeedsExclusive;
     pendingInvalidate = false;
     pendingShared = false;
-    deferredNeedsExclusive = false;
-    order = targets.front().order;
-    readyTime = std::max(curTick, targets.front().readyTime);
+    order = targets->front().order;
+    readyTime = std::max(curTick, targets->front().readyTime);
 
     return true;
 }
@@ -202,16 +276,17 @@ MSHR::dump()
              "Addr: %x ntargets %d\n"
              "Targets:\n",
              inService, threadNum, addr, ntargets);
-
-    TargetListIterator tar_it = targets.begin();
+#if 0
+    TargetListIterator tar_it = targets->begin();
     for (int i = 0; i < ntargets; i++) {
-        assert(tar_it != targets.end());
+        assert(tar_it != targets->end());
 
         ccprintf(cerr, "\t%d: Addr: %x cmd: %s\n",
                  i, tar_it->pkt->getAddr(), tar_it->pkt->cmdString());
 
         tar_it++;
     }
+#endif
     ccprintf(cerr, "\n");
 }
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 9c6a8cf33..06ef6e113 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -68,10 +68,21 @@ class MSHR : public Packet::SenderState
         {}
     };
 
-    /** Defines the Data structure of the MSHR targetlist. */
-    typedef std::list<Target> TargetList;
-    /** Target list iterator. */
-    typedef std::list<Target>::iterator TargetListIterator;
+    class TargetList : public std::list<Target> {
+        /** Target list iterator. */
+        typedef std::list<Target>::iterator Iterator;
+
+      public:
+        bool needsExclusive;
+        bool hasUpgrade;
+
+        TargetList();
+        void resetFlags() { needsExclusive = hasUpgrade = false; }
+        bool isReset()    { return !needsExclusive && !hasUpgrade; }
+        void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide);
+        void replaceUpgrades();
+    };
+
     /** A list of MSHRs. */
     typedef std::list<MSHR *> List;
     /** MSHR list iterator. */
@@ -99,13 +110,13 @@ class MSHR : public Packet::SenderState
 
     /** True if we will be putting the returned block in the cache */
     bool isCacheFill;
+
     /** True if we need to get an exclusive copy of the block. */
-    bool needsExclusive;
+    bool needsExclusive() { return targets->needsExclusive; }
 
     /** True if the request is uncacheable */
     bool _isUncacheable;
 
-    bool deferredNeedsExclusive;
     bool pendingInvalidate;
     bool pendingShared;
 
@@ -133,9 +144,9 @@ class MSHR : public Packet::SenderState
 
 private:
     /** List of all requests that match the address */
-    TargetList targets;
+    TargetList *targets;
 
-    TargetList deferredTargets;
+    TargetList *deferredTargets;
 
 public:
 
@@ -179,19 +190,19 @@ public:
      * Returns a pointer to the target list.
      * @return a pointer to the target list.
      */
-    TargetList* getTargetList() { return &targets; }
+    TargetList *getTargetList() { return targets; }
 
     /**
      * Returns true if there are targets left.
      * @return true if there are targets
      */
-    bool hasTargets() { return !targets.empty(); }
+    bool hasTargets() { return !targets->empty(); }
 
     /**
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Target *getTarget() { assert(hasTargets());  return &targets.front(); }
+    Target *getTarget() { assert(hasTargets());  return &targets->front(); }
 
     /**
      * Pop first target.
@@ -199,7 +210,7 @@ public:
     void popTarget()
     {
         --ntargets;
-        targets.pop_front();
+        targets->pop_front();
     }
 
     bool isSimpleForward()
-- 
cgit v1.2.3


From 1edd143b688e9bd7285344ee8d2f5bd8cb3dc12d Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 22 Jul 2007 03:07:26 -0400
Subject: A few minor non-debug compilation issues.

--HG--
extra : convert_revision : d59a5cad6187a2229dddd1a48282ebd2923d1d8d
---
 src/mem/cache/cache_impl.hh      | 2 ++
 src/mem/cache/miss/mshr_queue.cc | 1 +
 src/mem/cache/tags/split.cc      | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9fb5cdbde..c8c1a239c 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -821,7 +821,9 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
                             PacketList &writebacks)
 {
     Addr addr = pkt->getAddr();
+#if TRACING_ON
     CacheBlk::State old_state = blk ? blk->status : 0;
+#endif
 
     if (blk == NULL) {
         // better have read new data...
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 56ec62a7d..4d3cf30e1 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -123,6 +123,7 @@ MSHRQueue::addToReadyList(MSHR *mshr)
         }
     }
     assert(false);
+    return end;  // keep stupid compilers happy
 }
 
 
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
index e22ccbb96..ae284766d 100644
--- a/src/mem/cache/tags/split.cc
+++ b/src/mem/cache/tags/split.cc
@@ -300,7 +300,7 @@ Split::findBlock(Addr addr) const
 SplitBlk*
 Split::findReplacement(Addr addr, PacketList &writebacks)
 {
-    SplitBlk *blk;
+    SplitBlk *blk = NULL;
 
     assert(0);
 #if 0
-- 
cgit v1.2.3


From 1c2d5f5e64387527efe495a59f6946e7b539a543 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 22 Jul 2007 08:09:24 -0700
Subject: Replace DeferredSnoop flag with LowerMSHRPending flag. Turns out
 DeferredSnoop isn't quite the right bit of info we needed... see new comment
 in cache_impl.hh.

--HG--
extra : convert_revision : a38de8c1677a37acafb743b7074ef88b21d3b7be
---
 src/mem/cache/cache.hh      |  3 ++-
 src/mem/cache/cache_impl.hh | 28 ++++++++++++++++++++--------
 src/mem/cache/miss/mshr.cc  |  2 +-
 src/mem/packet.hh           |  6 +++---
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 57028a05e..7dfe9e8f1 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -190,7 +190,8 @@ class Cache : public BaseCache
      * @param new_state The new coherence state for the block.
      */
     void handleSnoop(PacketPtr ptk, BlkType *blk,
-                     bool is_timing, bool is_deferred);
+                     bool is_timing, bool is_deferred,
+                     bool lower_mshr_pending);
 
     /**
      * Create a writeback request for the given block.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index c8c1a239c..82410afe1 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -754,7 +754,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         } else {
             // response to snoop request
             DPRINTF(Cache, "processing deferred snoop...\n");
-            handleSnoop(target->pkt, blk, true, true);
+            handleSnoop(target->pkt, blk, true, true, false);
         }
 
         mshr->popTarget();
@@ -917,7 +917,8 @@ Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
 template<class TagStore>
 void
 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                             bool is_timing, bool is_deferred)
+                             bool is_timing, bool is_deferred,
+                             bool lower_mshr_pending)
 {
     assert(pkt->isRequest());
 
@@ -929,8 +930,8 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
-        if (is_deferred) {
-            snoopPkt->setDeferredSnoop();
+        if (lower_mshr_pending) {
+            snoopPkt->setLowerMSHRPending();
         }
         snoopPkt->senderState = new ForwardResponseRecord(pkt, this);
         cpuSidePort->sendTiming(snoopPkt);
@@ -1017,8 +1018,19 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
-    // better not be snooping a request that conflicts with something
-    // we have outstanding...
+
+    // If a lower cache has an operation on this block pending (not
+    // yet in service) on the MSHR, then the upper caches need to know
+    // about it, as this means that the pending operation logically
+    // succeeds the current snoop.  It's not sufficient to record
+    // whether the MSHR *is* in service, as this misses the window
+    // where the lower cache has completed the request and the
+    // response is on its way back up the hierarchy.
+    bool lower_mshr_pending =
+        (mshr && (!mshr->inService) || pkt->lowerMSHRPending());
+
+    // Let the MSHR itself track the snoop and decide whether we want
+    // to go ahead and do the regular cache snoop
     if (mshr && mshr->handleSnoop(pkt, order++)) {
         DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n",
                 blk_addr);
@@ -1063,7 +1075,7 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
         }
     }
 
-    handleSnoop(pkt, blk, true, false);
+    handleSnoop(pkt, blk, true, false, lower_mshr_pending);
 }
 
 
@@ -1078,7 +1090,7 @@ Cache<TagStore>::snoopAtomic(PacketPtr pkt)
     }
 
     BlkType *blk = tags->findBlock(pkt->getAddr());
-    handleSnoop(pkt, blk, false, false);
+    handleSnoop(pkt, blk, false, false, false);
     return hitLatency;
 }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 856819c10..b9dfdf729 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -164,7 +164,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 bool
 MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 {
-    if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) {
+    if (!inService || (pkt->isExpressSnoop() && pkt->lowerMSHRPending())) {
         // Request has not been issued yet, or it's been issued
         // locally but is buffered unissued at some downstream cache
         // which is forwarding us this snoop.  Either way, the packet
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 8063c7ae7..779ea49a2 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -257,7 +257,7 @@ class Packet : public FastAlloc
         Shared,
         // Special control flags
         ExpressSnoop,
-        DeferredSnoop,
+        LowerMSHRPending,  // not yet in service
         NUM_PACKET_FLAGS
     };
 
@@ -323,8 +323,8 @@ class Packet : public FastAlloc
     // Special control flags
     void setExpressSnoop()      { flags[ExpressSnoop] = true; }
     bool isExpressSnoop()       { return flags[ExpressSnoop]; }
-    void setDeferredSnoop()     { flags[DeferredSnoop] = true; }
-    bool isDeferredSnoop()      { return flags[DeferredSnoop]; }
+    void setLowerMSHRPending()  { flags[LowerMSHRPending] = true; }
+    bool lowerMSHRPending()     { return flags[LowerMSHRPending]; }
 
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
-- 
cgit v1.2.3


From 82e2a3557672864f0ea3ae64dad61681546aaf07 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 22 Jul 2007 21:43:38 -0700
Subject: Replace lowerMSHRPending flag with more robust scheme based on
 following Packet senderState links.

--HG--
extra : convert_revision : 9027d59bd7242aa0e4275bf94d8b1fb27bd59d79
---
 src/mem/cache/cache.hh           |  3 +--
 src/mem/cache/cache_impl.hh      | 22 ++++--------------
 src/mem/cache/miss/mshr.cc       | 48 ++++++++++++++++++++++++++++++++++++++--
 src/mem/cache/miss/mshr.hh       |  5 +++++
 src/mem/cache/miss/mshr_queue.cc | 16 ++++----------
 src/mem/packet.hh                |  3 ---
 6 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 7dfe9e8f1..57028a05e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -190,8 +190,7 @@ class Cache : public BaseCache
      * @param new_state The new coherence state for the block.
      */
     void handleSnoop(PacketPtr ptk, BlkType *blk,
-                     bool is_timing, bool is_deferred,
-                     bool lower_mshr_pending);
+                     bool is_timing, bool is_deferred);
 
     /**
      * Create a writeback request for the given block.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 82410afe1..efd7f4588 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -754,7 +754,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         } else {
             // response to snoop request
             DPRINTF(Cache, "processing deferred snoop...\n");
-            handleSnoop(target->pkt, blk, true, true, false);
+            handleSnoop(target->pkt, blk, true, true);
         }
 
         mshr->popTarget();
@@ -917,8 +917,7 @@ Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
 template<class TagStore>
 void
 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                             bool is_timing, bool is_deferred,
-                             bool lower_mshr_pending)
+                             bool is_timing, bool is_deferred)
 {
     assert(pkt->isRequest());
 
@@ -930,9 +929,6 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
-        if (lower_mshr_pending) {
-            snoopPkt->setLowerMSHRPending();
-        }
         snoopPkt->senderState = new ForwardResponseRecord(pkt, this);
         cpuSidePort->sendTiming(snoopPkt);
         if (snoopPkt->memInhibitAsserted()) {
@@ -1019,16 +1015,6 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
-    // If a lower cache has an operation on this block pending (not
-    // yet in service) on the MSHR, then the upper caches need to know
-    // about it, as this means that the pending operation logically
-    // succeeds the current snoop.  It's not sufficient to record
-    // whether the MSHR *is* in service, as this misses the window
-    // where the lower cache has completed the request and the
-    // response is on its way back up the hierarchy.
-    bool lower_mshr_pending =
-        (mshr && (!mshr->inService) || pkt->lowerMSHRPending());
-
     // Let the MSHR itself track the snoop and decide whether we want
     // to go ahead and do the regular cache snoop
     if (mshr && mshr->handleSnoop(pkt, order++)) {
@@ -1075,7 +1061,7 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
         }
     }
 
-    handleSnoop(pkt, blk, true, false, lower_mshr_pending);
+    handleSnoop(pkt, blk, true, false);
 }
 
 
@@ -1090,7 +1076,7 @@ Cache<TagStore>::snoopAtomic(PacketPtr pkt)
     }
 
     BlkType *blk = tags->findBlock(pkt->getAddr());
-    handleSnoop(pkt, blk, false, false, false);
+    handleSnoop(pkt, blk, false, false);
     return hitLatency;
 }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index b9dfdf729..9b05aea3f 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -63,7 +63,8 @@ MSHR::TargetList::TargetList()
 
 
 inline void
-MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide)
+MSHR::TargetList::add(PacketPtr pkt, Tick readyTime,
+                      Counter order, bool cpuSide)
 {
     if (cpuSide) {
         if (pkt->needsExclusive()) {
@@ -73,6 +74,12 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide
         if (pkt->cmd == MemCmd::UpgradeReq) {
             hasUpgrade = true;
         }
+
+        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+        if (mshr != NULL) {
+            assert(!mshr->downstreamPending);
+            mshr->downstreamPending = true;
+        }
     }
 
     push_back(Target(pkt, readyTime, order, cpuSide));
@@ -97,6 +104,20 @@ MSHR::TargetList::replaceUpgrades()
 }
 
 
+void
+MSHR::TargetList::clearDownstreamPending()
+{
+    Iterator end_i = end();
+    for (Iterator i = begin(); i != end_i; ++i) {
+        MSHR *mshr = dynamic_cast<MSHR*>(i->pkt->senderState);
+        if (mshr != NULL) {
+            assert(mshr->downstreamPending);
+            mshr->downstreamPending = false;
+        }
+    }
+}
+
+
 void
 MSHR::allocate(Addr _addr, int _size, PacketPtr target,
                Tick whenReady, Counter _order)
@@ -109,6 +130,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     isCacheFill = false;
     _isUncacheable = target->req->isUncacheable();
     inService = false;
+    downstreamPending = false;
     threadNum = 0;
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
@@ -121,6 +143,28 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     data = NULL;
 }
 
+
+bool
+MSHR::markInService()
+{
+    assert(!inService);
+    if (isSimpleForward()) {
+        // we just forwarded the request packet & don't expect a
+        // response, so get rid of it
+        assert(getNumTargets() == 1);
+        popTarget();
+        return true;
+    }
+    inService = true;
+    if (!downstreamPending) {
+        // let upstream caches know that the request has made it to a
+        // level where it's going to get a response
+        targets->clearDownstreamPending();
+    }
+    return false;
+}
+
+
 void
 MSHR::deallocate()
 {
@@ -164,7 +208,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 bool
 MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 {
-    if (!inService || (pkt->isExpressSnoop() && pkt->lowerMSHRPending())) {
+    if (!inService || downstreamPending) {
         // Request has not been issued yet, or it's been issued
         // locally but is buffered unissued at some downstream cache
         // which is forwarding us this snoop.  Either way, the packet
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 06ef6e113..e850a8633 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -81,6 +81,7 @@ class MSHR : public Packet::SenderState
         bool isReset()    { return !needsExclusive && !hasUpgrade; }
         void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide);
         void replaceUpgrades();
+        void clearDownstreamPending();
     };
 
     /** A list of MSHRs. */
@@ -117,6 +118,8 @@ class MSHR : public Packet::SenderState
     /** True if the request is uncacheable */
     bool _isUncacheable;
 
+    bool downstreamPending;
+
     bool pendingInvalidate;
     bool pendingShared;
 
@@ -163,6 +166,8 @@ public:
     void allocate(Addr addr, int size, PacketPtr pkt,
                   Tick when, Counter _order);
 
+    bool markInService();
+
     /**
      * Mark this MSHR as free.
      */
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 4d3cf30e1..50a28fb3c 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -179,20 +179,12 @@ MSHRQueue::moveToFront(MSHR *mshr)
 void
 MSHRQueue::markInService(MSHR *mshr)
 {
-    assert(!mshr->inService);
-    if (mshr->isSimpleForward()) {
-        // we just forwarded the request packet & don't expect a
-        // response, so get rid of it
-        assert(mshr->getNumTargets() == 1);
-        mshr->popTarget();
+    if (mshr->markInService()) {
         deallocate(mshr);
-        return;
+    } else {
+        readyList.erase(mshr->readyIter);
+        inServiceEntries += 1;
     }
-    mshr->inService = true;
-    readyList.erase(mshr->readyIter);
-    //mshr->readyIter = NULL;
-    inServiceEntries += 1;
-    //readyList.pop_front();
 }
 
 void
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 779ea49a2..036bd3fd7 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -257,7 +257,6 @@ class Packet : public FastAlloc
         Shared,
         // Special control flags
         ExpressSnoop,
-        LowerMSHRPending,  // not yet in service
         NUM_PACKET_FLAGS
     };
 
@@ -323,8 +322,6 @@ class Packet : public FastAlloc
     // Special control flags
     void setExpressSnoop()      { flags[ExpressSnoop] = true; }
     bool isExpressSnoop()       { return flags[ExpressSnoop]; }
-    void setLowerMSHRPending()  { flags[LowerMSHRPending] = true; }
-    bool lowerMSHRPending()     { return flags[LowerMSHRPending]; }
 
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
-- 
cgit v1.2.3


From 97f7ee2e507733eb9dd1802c16900fd14ae6b7f3 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 23 Jul 2007 08:18:51 -0700
Subject: Fix WriteReq/StoreCondReq setting in O3.

--HG--
extra : convert_revision : b41571535f3d1f78df3cb6e48c16de5c7549d87f
---
 src/cpu/o3/lsq_unit_impl.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 5ae1cc0e4..8b2e82d8e 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -649,7 +649,7 @@ LSQUnit<Impl>::writebackStores()
 
         MemCmd command =
             req->isSwap() ? MemCmd::SwapReq :
-            (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq);
+            (req->isLocked() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
         PacketPtr data_pkt = new Packet(req, command,
                                         Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
-- 
cgit v1.2.3


From 1f9ea6e122f6a39d936aec2f8f5ce72d267799a8 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 23 Jul 2007 22:28:40 -0700
Subject: A couple more minor bug fixes for multilevel coherence.

--HG--
extra : convert_revision : 370f9e34911157765be6fd49e826fa1af589b466
---
 src/mem/cache/cache_impl.hh | 21 ++++++++++++++-------
 src/mem/cache/miss/mshr.cc  | 10 ++++++++--
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index efd7f4588..412d10599 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -148,7 +148,13 @@ void
 Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
     assert(blk);
-    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    // Occasionally this is not true... if we are a lower-level cache
+    // satisfying a string of Read and ReadEx requests from
+    // upper-level caches, a Read will mark the block as shared but we
+    // can satisfy a following ReadEx anyway since we can rely on the
+    // Read requester(s) to have buffered the ReadEx snoop and to
+    // invalidate their blocks after receiving them.
+    // assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
     assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 
     // Check RMW operations first since both isRead() and
@@ -727,7 +733,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
             Tick completion_time;
             if (blk != NULL) {
                 satisfyCpuSideRequest(target->pkt, blk);
-                // How many bytes pass the first request is this one
+                // How many bytes past the first request is this one
                 int transfer_offset =
                     target->pkt->getOffset(blkSize) - initial_offset;
                 if (transfer_offset < 0) {
@@ -738,10 +744,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                 completion_time = tags->getHitLatency() +
                     transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 
-                if (!target->pkt->req->isUncacheable()) {
-                    missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                        completion_time - target->recvTime;
-                }
+                assert(!target->pkt->req->isUncacheable());
+                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                    completion_time - target->recvTime;
             } else {
                 // not a cache fill, just forwarding response
                 completion_time = tags->getHitLatency() + pkt->finishTime;
@@ -1004,7 +1009,9 @@ template<class TagStore>
 void
 Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
-    if (pkt->req->isUncacheable()) {
+    // Note that some deferred snoops don't have requests, since the
+    // original access may have already completed
+    if (pkt->req && pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
         //Revisit this for multi level coherence
         return;
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 9b05aea3f..7f216ad39 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -208,7 +208,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 bool
 MSHR::handleSnoop(PacketPtr pkt, Counter _order)
 {
-    if (!inService || downstreamPending) {
+    if (!inService || (pkt->isExpressSnoop() && downstreamPending)) {
         // Request has not been issued yet, or it's been issued
         // locally but is buffered unissued at some downstream cache
         // which is forwarding us this snoop.  Either way, the packet
@@ -249,13 +249,19 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
     if (targets->needsExclusive || pkt->needsExclusive()) {
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
-        targets->add(new Packet(pkt), curTick, _order, false);
+        PacketPtr cp_pkt = new Packet(pkt);
+        targets->add(cp_pkt, curTick, _order, false);
         ++ntargets;
 
         if (targets->needsExclusive) {
             // We're awaiting an exclusive copy, so ownership is pending.
             // It's up to us to respond once the data arrives.
             pkt->assertMemInhibit();
+        } else {
+            // Someone else may respond before we get around to
+            // processing this snoop, which means the copied request
+            // pointer will no longer be valid
+            cp_pkt->req = NULL;
         }
 
         if (pkt->needsExclusive()) {
-- 
cgit v1.2.3


From d094f9cf277bf08e57057a68ac6beb295bee5ce1 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 24 Jul 2007 22:36:10 -0700
Subject: Don't delete request at target... requester still needs it.

--HG--
extra : convert_revision : 76377ca2e4d7ea70d1d54d325a63ce710e260b93
---
 src/mem/tport.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index a4f791048..e4b8d70e9 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -69,7 +69,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     if (pkt->memInhibitAsserted()) {
         // snooper will supply based on copy of packet
         // still target's responsibility to delete packet
-        delete pkt->req;
         delete pkt;
         return true;
     }
-- 
cgit v1.2.3


From de52eebd3bbdeebadd5d7e7ce5f16dd68efb8301 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 24 Jul 2007 22:37:41 -0700
Subject: Integrate snoop loop functions into their respective call sites. Also
 some additional cleanup of Bus::recvTiming().

--HG--
extra : convert_revision : 156814119f75d04c2e954aec2d7ed6fdc186c26f
---
 src/mem/bus.cc | 156 +++++++++++++++++++++++++--------------------------------
 src/mem/bus.hh |   9 ----
 2 files changed, 67 insertions(+), 98 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index da8df06ea..8243d40f1 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -172,78 +172,79 @@ void Bus::occupyBus(PacketPtr pkt)
 bool
 Bus::recvTiming(PacketPtr pkt)
 {
-    int port_id;
+    short src = pkt->getSrc();
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
-            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
+            src, pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
-    BusPort *pktPort;
-    if (pkt->getSrc() == defaultId)
-        pktPort = defaultPort;
-    else pktPort = interfaces[pkt->getSrc()];
+    BusPort *src_port = (src == defaultId) ? defaultPort : interfaces[src];
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
     if (!(pkt->isResponse() || pkt->isExpressSnoop()) &&
         (tickNextIdle > curTick ||
-         (retryList.size() && (!inRetry || pktPort != retryList.front()))))
+         (retryList.size() && (!inRetry || src_port != retryList.front()))))
     {
-        addToRetryList(pktPort);
+        addToRetryList(src_port);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
         return false;
     }
 
+    occupyBus(pkt);
+
     short dest = pkt->getDest();
+    int dest_port_id;
+    Port *dest_port;
 
     if (dest == Packet::Broadcast) {
-        port_id = findPort(pkt->getAddr());
-        timingSnoop(pkt, interfaces[port_id]);
-
-        if (pkt->memInhibitAsserted()) {
-            //Cache-Cache transfer occuring
-            if (inRetry) {
-                retryList.front()->onRetryList(false);
-                retryList.pop_front();
-                inRetry = false;
+        dest_port_id = findPort(pkt->getAddr());
+        dest_port = interfaces[dest_port_id];
+        for (SnoopIter s_iter = snoopPorts.begin();
+             s_iter != snoopPorts.end();
+             s_iter++) {
+            BusPort *p = *s_iter;
+            if (p != dest_port && p != src_port) {
+#ifndef NDEBUG
+                // cache is not allowed to refuse snoop
+                bool success = p->sendTiming(pkt);
+                assert(success);
+#else
+                // avoid unused variable warning
+                p->sendTiming(pkt);
+#endif
             }
-            occupyBus(pkt);
-            DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n");
-            return true;
         }
     } else {
         assert(dest >= 0 && dest < maxId);
-        assert(dest != pkt->getSrc()); // catch infinite loops
-        port_id = dest;
+        assert(dest != src); // catch infinite loops
+        dest_port_id = dest;
+        dest_port = interfaces[dest_port_id];
     }
 
-    occupyBus(pkt);
-
-    if (port_id != pkt->getSrc()) {
-        if (interfaces[port_id]->sendTiming(pkt))  {
-            // Packet was successfully sent. Return true.
-            // Also take care of retries
-            if (inRetry) {
-                DPRINTF(Bus, "Remove retry from list %d\n",
-                        retryList.front()->getId());
-                retryList.front()->onRetryList(false);
-                retryList.pop_front();
-                inRetry = false;
-            }
-            return true;
+    if (dest_port_id == src) {
+        // Must be forwarded snoop up from below...
+        assert(dest == Packet::Broadcast);
+    } else {
+        // send to actual target
+        if (!dest_port->sendTiming(pkt))  {
+            // Packet not successfully sent. Leave or put it on the retry list.
+            // illegal to block responses... can lead to deadlock
+            assert(!pkt->isResponse());
+            DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n", src);
+            addToRetryList(src_port);
+            return false;
         }
-
-        // Packet not successfully sent. Leave or put it on the retry list.
-        // illegal to block responses... can lead to deadlock
-        assert(!pkt->isResponse());
-        DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n",
-                pktPort->getId());
-        addToRetryList(pktPort);
-        return false;
+        // send OK, fall through
     }
-    else {
-        //Forwarding up from responder, just return true;
-        DPRINTF(Bus, "recvTiming: can we be here?\n");
-        return true;
+
+    // Packet was successfully sent.
+    // Also take care of retries
+    if (inRetry) {
+        DPRINTF(Bus, "Remove retry from list %d\n", src);
+        retryList.front()->onRetryList(false);
+        retryList.pop_front();
+        inRetry = false;
     }
+    return true;
 }
 
 void
@@ -314,46 +315,6 @@ Bus::findPort(Addr addr)
     return dest_id;
 }
 
-void
-Bus::functionalSnoop(PacketPtr pkt, Port *responder)
-{
-    // The packet may be changed by another bus on snoops, restore the
-    // id after each
-    int src_id = pkt->getSrc();
-
-    assert(pkt->isRequest()); // hasn't already been satisfied
-
-    for (SnoopIter s_iter = snoopPorts.begin();
-         s_iter != snoopPorts.end();
-         s_iter++) {
-        BusPort *p = *s_iter;
-        if (p != responder && p->getId() != src_id) {
-            p->sendFunctional(pkt);
-        }
-        if (pkt->isResponse()) {
-            break;
-        }
-        pkt->setSrc(src_id);
-    }
-}
-
-bool
-Bus::timingSnoop(PacketPtr pkt, Port* responder)
-{
-    for (SnoopIter s_iter = snoopPorts.begin();
-         s_iter != snoopPorts.end();
-         s_iter++) {
-        BusPort *p = *s_iter;
-        if (p != responder && p->getId() != pkt->getSrc()) {
-            bool success = p->sendTiming(pkt);
-            if (!success)
-                return false;
-        }
-    }
-
-    return true;
-}
-
 
 /** Function called by the port when the bus is receiving a Atomic
  * transaction.*/
@@ -434,7 +395,24 @@ Bus::recvFunctional(PacketPtr pkt)
 
     int port_id = findPort(pkt->getAddr());
     Port *port = interfaces[port_id];
-    functionalSnoop(pkt, port);
+    // The packet may be changed by another bus on snoops, restore the
+    // id after each
+    int src_id = pkt->getSrc();
+
+    assert(pkt->isRequest()); // hasn't already been satisfied
+
+    for (SnoopIter s_iter = snoopPorts.begin();
+         s_iter != snoopPorts.end();
+         s_iter++) {
+        BusPort *p = *s_iter;
+        if (p != port && p->getId() != src_id) {
+            p->sendFunctional(pkt);
+        }
+        if (pkt->isResponse()) {
+            break;
+        }
+        pkt->setSrc(src_id);
+    }
 
     // If the snooping hasn't found what we were looking for, keep going.
     if (!pkt->isResponse() && port_id != pkt->getSrc()) {
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index a19420244..06ccd4ac0 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -180,15 +180,6 @@ class Bus : public MemObject
      */
     int findPort(Addr addr);
 
-    /** Snoop all relevant ports functionally. */
-    void functionalSnoop(PacketPtr pkt, Port *responder);
-
-    /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want
-     * the snoop to happen
-     * @return True if succeds.
-     */
-    bool timingSnoop(PacketPtr pkt, Port *responder);
-
     /** Process address range request.
      * @param resp addresses that we can respond to
      * @param snoop addresses that we would like to snoop
-- 
cgit v1.2.3


From c1097d06f7b27f4dd6ecaa47d1685e015725b5f5 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Wed, 25 Jul 2007 07:47:37 -0700
Subject: Can't block on memInhibit packets (now that bus no longer filters
 them for us).

--HG--
extra : convert_revision : 34e7eaf5ee1e739f5557a2d417e569ed2ceb14b3
---
 src/mem/cache/cache_impl.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 412d10599..fa2f45632 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1230,7 +1230,7 @@ bool
 Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
     // illegal to block responses... can lead to deadlock
-    if (pkt->isRequest() && blocked) {
+    if (pkt->isRequest() && !pkt->memInhibitAsserted() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
-- 
cgit v1.2.3


From 58250b8e5fd8aba9ed99b7aff6ce67b05b379fa0 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 26 Jul 2007 17:04:12 -0700
Subject: bus: Fix default port handling.

--HG--
extra : convert_revision : 121b6e31cddff17c51fc4f3df20e7e2bde87d04f
---
 src/mem/bus.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 8243d40f1..1fad13c5a 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -197,7 +197,8 @@ Bus::recvTiming(PacketPtr pkt)
 
     if (dest == Packet::Broadcast) {
         dest_port_id = findPort(pkt->getAddr());
-        dest_port = interfaces[dest_port_id];
+        dest_port = (dest_port_id == defaultId) ?
+            defaultPort : interfaces[dest_port_id];
         for (SnoopIter s_iter = snoopPorts.begin();
              s_iter != snoopPorts.end();
              s_iter++) {
@@ -217,7 +218,8 @@ Bus::recvTiming(PacketPtr pkt)
         assert(dest >= 0 && dest < maxId);
         assert(dest != src); // catch infinite loops
         dest_port_id = dest;
-        dest_port = interfaces[dest_port_id];
+        dest_port = (dest_port_id == defaultId) ?
+            defaultPort : interfaces[dest_port_id];
     }
 
     if (dest_port_id == src) {
@@ -336,7 +338,8 @@ Bus::recvAtomic(PacketPtr pkt)
     int orig_src = pkt->getSrc();
 
     int target_port_id = findPort(pkt->getAddr());
-    Port *target_port = interfaces[target_port_id];
+    Port *target_port = (target_port_id == defaultId) ?
+        defaultPort : interfaces[target_port_id];
 
     SnoopIter s_end = snoopPorts.end();
     for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) {
@@ -394,7 +397,7 @@ Bus::recvFunctional(PacketPtr pkt)
     assert(pkt->getDest() == Packet::Broadcast);
 
     int port_id = findPort(pkt->getAddr());
-    Port *port = interfaces[port_id];
+    Port *port = (port_id == defaultId) ? defaultPort : interfaces[port_id];
     // The packet may be changed by another bus on snoops, restore the
     // id after each
     int src_id = pkt->getSrc();
-- 
cgit v1.2.3


From f1b5c8fb57ee656df3a1b9d021de723279f66b2f Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 26 Jul 2007 17:04:16 -0700
Subject: Continue snooping after a writeback is encountered.

--HG--
extra : convert_revision : 8411338a6c0fdd7072dd32bdffacdace62d5de90
---
 src/mem/cache/cache_impl.hh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index fa2f45632..6db40b609 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1064,7 +1064,13 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
                 // Invalidation trumps our writeback... discard here
                 markInService(mshr);
             }
-            return;
+
+            // If this was a shared writeback, there may still be
+            // other shared copies above that require invalidation.
+            // We could be more selective and return here if the
+            // request is non-exclusive or if the writeback is
+            // exclusive.
+            break;
         }
     }
 
-- 
cgit v1.2.3


From c3bf59dcb9dddd64d5ad603f7af6589b87e7afad Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 26 Jul 2007 17:04:17 -0700
Subject: Add downward express snoops for invalidations.

--HG--
extra : convert_revision : 4916fa9721d727d8416ad8c07df3a8171d02b2b4
---
 src/mem/cache/cache_impl.hh | 15 +++++++++++++++
 src/mem/cache/miss/mshr.cc  |  1 +
 src/mem/packet.hh           |  8 +++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 6db40b609..2ead54ba6 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -371,6 +371,17 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
                 pkt->getAddr());
         assert(!pkt->req->isUncacheable());
+        // Special tweak for multilevel coherence: snoop downward here
+        // on invalidates since there may be other caches below here
+        // that have shared copies.  Not necessary if we know that
+        // supplier had exclusive copy to begin with.
+        if (pkt->needsExclusive() && !pkt->isSupplyExclusive()) {
+            Packet *snoopPkt = new Packet(pkt, true);  // clear flags
+            snoopPkt->setExpressSnoop();
+            snoopPkt->assertMemInhibit();
+            memSidePort->sendTiming(snoopPkt);
+            // main memory will delete snoopPkt
+        }
         return true;
     }
 
@@ -966,6 +977,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
     bool supply = blk->isDirty() && pkt->isRead() && !upperSupply;
+    bool have_exclusive = blk->isWritable();
     bool invalidate = pkt->isInvalidate();
 
     if (pkt->isRead() && !pkt->isInvalidate()) {
@@ -985,6 +997,9 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     if (supply) {
         assert(!pkt->memInhibitAsserted());
         pkt->assertMemInhibit();
+        if (have_exclusive) {
+            pkt->setSupplyExclusive();
+        }
         if (is_timing) {
             doTimingSupplyResponse(pkt, blk->data, is_deferred);
         } else {
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 7f216ad39..5ba3d1ec5 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -257,6 +257,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
             // We're awaiting an exclusive copy, so ownership is pending.
             // It's up to us to respond once the data arrives.
             pkt->assertMemInhibit();
+            pkt->setSupplyExclusive();
         } else {
             // Someone else may respond before we get around to
             // processing this snoop, which means the copied request
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 036bd3fd7..c6534d6c9 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -256,7 +256,11 @@ class Packet : public FastAlloc
         MemInhibit,
         Shared,
         // Special control flags
+        /// Special timing-mode atomic snoop for multi-level coherence.
         ExpressSnoop,
+        /// Does supplier have exclusive copy?
+        /// Useful for multi-level coherence.
+        SupplyExclusive,
         NUM_PACKET_FLAGS
     };
 
@@ -315,13 +319,15 @@ class Packet : public FastAlloc
 
     // Snoop flags
     void assertMemInhibit()     { flags[MemInhibit] = true; }
-    void assertShared()         { flags[Shared] = true; }
     bool memInhibitAsserted()   { return flags[MemInhibit]; }
+    void assertShared()         { flags[Shared] = true; }
     bool sharedAsserted()       { return flags[Shared]; }
 
     // Special control flags
     void setExpressSnoop()      { flags[ExpressSnoop] = true; }
     bool isExpressSnoop()       { return flags[ExpressSnoop]; }
+    void setSupplyExclusive()   { flags[SupplyExclusive] = true; }
+    bool isSupplyExclusive()    { return flags[SupplyExclusive]; }
 
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
-- 
cgit v1.2.3


From 6b73ff43ff58502c80050c7aeff5a08a4ce61f87 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 26 Jul 2007 17:04:17 -0700
Subject: Have owner respond to UpgradeReq to avoid race.

--HG--
extra : convert_revision : 30916fca6978c73d8a14558f2d7288c1eab54ad4
---
 src/mem/cache/cache_impl.hh | 46 +++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 2ead54ba6..a35d7b2a6 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -926,7 +926,9 @@ Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
     }
     pkt->allocate();
     pkt->makeTimingResponse();
-    pkt->setDataFromBlock(blk_data, blkSize);
+    if (pkt->isRead()) {
+        pkt->setDataFromBlock(blk_data, blkSize);
+    }
     memSidePort->respond(pkt, curTick + hitLatency);
 }
 
@@ -940,8 +942,8 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // first propagate snoop upward to see if anyone above us wants to
     // handle it.  save & restore packet src since it will get
     // rewritten to be relative to cpu-side bus (if any)
-    bool alreadySupplied = pkt->memInhibitAsserted();
-    bool upperSupply = false;
+    bool alreadyResponded = pkt->memInhibitAsserted();
+    bool upperResponse = false;
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
@@ -949,7 +951,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
         cpuSidePort->sendTiming(snoopPkt);
         if (snoopPkt->memInhibitAsserted()) {
             // cache-to-cache response from some upper cache
-            assert(!alreadySupplied);
+            assert(!alreadyResponded);
             pkt->assertMemInhibit();
         } else {
             delete snoopPkt->senderState;
@@ -961,7 +963,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     } else {
         int origSrc = pkt->getSrc();
         cpuSidePort->sendAtomic(pkt);
-        if (!alreadySupplied && pkt->memInhibitAsserted()) {
+        if (!alreadyResponded && pkt->memInhibitAsserted()) {
             // cache-to-cache response from some upper cache:
             // forward response to original requester
             assert(pkt->isResponse());
@@ -976,7 +978,8 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we may end up modifying both the block state and the packet (if
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
-    bool supply = blk->isDirty() && pkt->isRead() && !upperSupply;
+    assert(!(blk->isDirty() && upperResponse));
+    bool respond = blk->isDirty() && pkt->needsResponse();
     bool have_exclusive = blk->isWritable();
     bool invalidate = pkt->isInvalidate();
 
@@ -994,7 +997,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
         blk->status &= ~bits_to_clear;
     }
 
-    if (supply) {
+    if (respond) {
         assert(!pkt->memInhibitAsserted());
         pkt->assertMemInhibit();
         if (have_exclusive) {
@@ -1016,7 +1019,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
 
     DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n",
             pkt->cmdString(), blockAlign(pkt->getAddr()),
-            supply ? "supplying data, " : "", blk->status);
+            respond ? "responding, " : "", blk->status);
 }
 
 
@@ -1026,7 +1029,8 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
     // Note that some deferred snoops don't have requests, since the
     // original access may have already completed
-    if (pkt->req && pkt->req->isUncacheable()) {
+    if ((pkt->req && pkt->req->isUncacheable()) ||
+        pkt->cmd == MemCmd::Writeback) {
         //Can't get a hit on an uncacheable address
         //Revisit this for multi level coherence
         return;
@@ -1061,19 +1065,17 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
             PacketPtr wb_pkt = mshr->getTarget()->pkt;
             assert(wb_pkt->cmd == MemCmd::Writeback);
 
-            if (pkt->isRead()) {
-                assert(!pkt->memInhibitAsserted());
-                pkt->assertMemInhibit();
-                if (!pkt->needsExclusive()) {
-                    pkt->assertShared();
-                } else {
-                    // if we're not asserting the shared line, we need to
-                    // invalidate our copy.  we'll do that below as long as
-                    // the packet's invalidate flag is set...
-                    assert(pkt->isInvalidate());
-                }
-                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>(), false);
+            assert(!pkt->memInhibitAsserted());
+            pkt->assertMemInhibit();
+            if (!pkt->needsExclusive()) {
+                pkt->assertShared();
+            } else {
+                // if we're not asserting the shared line, we need to
+                // invalidate our copy.  we'll do that below as long as
+                // the packet's invalidate flag is set...
+                assert(pkt->isInvalidate());
             }
+            doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>(), false);
 
             if (pkt->isInvalidate()) {
                 // Invalidation trumps our writeback... discard here
@@ -1097,7 +1099,7 @@ template<class TagStore>
 Tick
 Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 {
-    if (pkt->req->isUncacheable()) {
+    if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) {
         // Can't get a hit on an uncacheable address
         // Revisit this for multi level coherence
         return hitLatency;
-- 
cgit v1.2.3


From 01c9d34a0b4bcef3d8cca12eaeb7753e376378a8 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 27 Jul 2007 03:51:15 -0400
Subject: cache: Get rid of unused variable.

--HG--
extra : convert_revision : 394adc12fbd7ea10280a1b8d6bc3cb15ee019f27
---
 src/mem/cache/cache_impl.hh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a35d7b2a6..150cf80b7 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -943,7 +943,6 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // handle it.  save & restore packet src since it will get
     // rewritten to be relative to cpu-side bus (if any)
     bool alreadyResponded = pkt->memInhibitAsserted();
-    bool upperResponse = false;
     if (is_timing) {
         Packet *snoopPkt = new Packet(pkt, true);  // clear flags
         snoopPkt->setExpressSnoop();
@@ -978,7 +977,6 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we may end up modifying both the block state and the packet (if
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
-    assert(!(blk->isDirty() && upperResponse));
     bool respond = blk->isDirty() && pkt->needsResponse();
     bool have_exclusive = blk->isWritable();
     bool invalidate = pkt->isInvalidate();
-- 
cgit v1.2.3


From 0cbcb715e0f6f2f7b1338d37e641ef931247748f Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 27 Jul 2007 12:46:45 -0700
Subject: cache/memtest: fixes for functional accesses.

--HG--
extra : convert_revision : 688ba4d882cad2c96cf44c9e46999f74266e02ee
---
 src/cpu/memtest/memtest.cc       |  2 --
 src/mem/cache/cache_impl.hh      | 31 +++++--------------------------
 src/mem/cache/miss/mshr.cc       | 14 ++++++++++++++
 src/mem/cache/miss/mshr.hh       |  6 ++++++
 src/mem/cache/miss/mshr_queue.cc | 15 +++++++++++++++
 src/mem/cache/miss/mshr_queue.hh |  2 ++
 6 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 86a33f44b..83417c514 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -359,7 +359,6 @@ MemTest::tick()
 
         if (probe) {
             cachePort.sendFunctional(pkt);
-            pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
             sendPkt(pkt);
@@ -393,7 +392,6 @@ MemTest::tick()
 
         if (probe) {
             cachePort.sendFunctional(pkt);
-            pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
             sendPkt(pkt);
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 150cf80b7..c1b01d676 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -641,33 +641,12 @@ Cache<TagStore>::functionalAccess(PacketPtr pkt,
         return;
     }
 
-    // Need to check for outstanding misses and writes
-
-    // There can only be one matching outstanding miss.
-    MSHR *mshr = mshrQueue.findMatch(blk_addr);
-    if (mshr) {
-        MSHR::TargetList *targets = mshr->getTargetList();
-        MSHR::TargetList::iterator i = targets->begin();
-        MSHR::TargetList::iterator end = targets->end();
-        for (; i != end; ++i) {
-            PacketPtr targetPkt = i->pkt;
-            if (pkt->checkFunctional(targetPkt))
-                return;
-        }
+    // Need to check for outstanding misses and writes; if neither one
+    // satisfies, then forward to other side of cache.
+    if (!(mshrQueue.checkFunctional(pkt, blk_addr) ||
+          writeBuffer.checkFunctional(pkt, blk_addr))) {
+        otherSidePort->checkAndSendFunctional(pkt);
     }
-
-    // There can be many matching outstanding writes.
-    std::vector<MSHR*> writes;
-    assert(!writeBuffer.findMatches(blk_addr, writes));
-/*  Need to change this to iterate through targets in mshr??
-    for (int i = 0; i < writes.size(); ++i) {
-        MSHR *mshr = writes[i];
-        if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
-            return;
-    }
-*/
-
-    otherSidePort->checkAndSendFunctional(pkt);
 }
 
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 5ba3d1ec5..7796773a3 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -118,6 +118,20 @@ MSHR::TargetList::clearDownstreamPending()
 }
 
 
+bool
+MSHR::TargetList::checkFunctional(PacketPtr pkt)
+{
+    Iterator end_i = end();
+    for (Iterator i = begin(); i != end_i; ++i) {
+        if (pkt->checkFunctional(i->pkt)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
 void
 MSHR::allocate(Addr _addr, int _size, PacketPtr target,
                Tick whenReady, Counter _order)
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index e850a8633..c865ca3ac 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -82,6 +82,7 @@ class MSHR : public Packet::SenderState
         void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide);
         void replaceUpgrades();
         void clearDownstreamPending();
+        bool checkFunctional(PacketPtr pkt);
     };
 
     /** A list of MSHRs. */
@@ -230,6 +231,11 @@ public:
 
     void handleFill(Packet *pkt, CacheBlk *blk);
 
+    bool checkFunctional(PacketPtr pkt) {
+        return (targets->checkFunctional(pkt) ||
+                deferredTargets->checkFunctional(pkt));
+    }
+
     /**
      * Prints the contents of this MSHR to stderr.
      */
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 50a28fb3c..911329e0c 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -84,9 +84,24 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
         }
     }
     return retval;
+}
+
 
+bool
+MSHRQueue::checkFunctional(PacketPtr pkt, Addr blk_addr)
+{
+    MSHR::ConstIterator i = allocatedList.begin();
+    MSHR::ConstIterator end = allocatedList.end();
+    for (; i != end; ++i) {
+        MSHR *mshr = *i;
+        if (mshr->addr == blk_addr && mshr->checkFunctional(pkt)) {
+            return true;
+        }
+    }
+    return false;
 }
 
+
 MSHR *
 MSHRQueue::findPending(Addr addr, int size) const
 {
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 1f1d59e98..447ebfc5a 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -115,6 +115,8 @@ class MSHRQueue
      */
     MSHR *findPending(Addr addr, int size) const;
 
+    bool checkFunctional(PacketPtr pkt, Addr blk_addr);
+
     /**
      * Allocates a new MSHR for the request and size. This places the request
      * as the first target in the MSHR.
-- 
cgit v1.2.3


From 8705b0799bddef95d9957a03ee7ffb8fbb1bdec7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 27 Jul 2007 12:46:55 -0700
Subject: packet: get rid of unused intersect() function.

--HG--
extra : convert_revision : f0a2947ccc49e0d18bc17a59371fa396d9ebd6c0
---
 src/mem/packet.cc | 12 ------------
 src/mem/packet.hh |  3 ---
 2 files changed, 15 deletions(-)

diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 8cd356768..c7c6ec083 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -152,18 +152,6 @@ Packet::allocate()
     data = new uint8_t[getSize()];
 }
 
-/** Do the packet modify the same addresses. */
-bool
-Packet::intersect(PacketPtr p)
-{
-    Addr s1 = getAddr();
-    Addr e1 = getAddr() + getSize() - 1;
-    Addr s2 = p->getAddr();
-    Addr e2 = p->getAddr() + p->getSize() - 1;
-
-    return !(s1 > e2 || e1 < s2);
-}
-
 
 bool
 Packet::checkFunctional(Addr addr, int size, uint8_t *data)
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index c6534d6c9..2b650a51e 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -564,9 +564,6 @@ class Packet : public FastAlloc
     /** If there isn't data in the packet, allocate some. */
     void allocate();
 
-    /** Do the packet modify the same addresses. */
-    bool intersect(PacketPtr p);
-
     /**
      * Check a functional request against a memory value represented
      * by a base/size pair and an associated data array.  If the
-- 
cgit v1.2.3

-- 
cgit v1.2.3


From aaf59949e58ceb617aa4efd04597a63c88638b9d Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 28 Jul 2007 18:00:05 -0700
Subject: AtomicSimpleCPU: fix inadvertent loss of endian conversion on read.

--HG--
extra : convert_revision : 367bf2431bf4f4eb7c4d5723816e5db6f7233aed
---
 src/cpu/simple/atomic.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 054f67d69..b830cbf3a 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -293,6 +293,8 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
         dcache_access = true;
         assert(!pkt.isError());
 
+        data = gtoh(data);
+
         if (req->isLocked()) {
             TheISA::handleLockedRead(thread, req);
         }
-- 
cgit v1.2.3


From 4a7d0c4b79450e05b87da4cfc48c2361758127c1 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 29 Jul 2007 13:24:48 -0700
Subject: bus: take out response prioritization (timing was messed up). Also
 make express snoops not occupy bus (since they're magic).

--HG--
extra : convert_revision : 75aa5211a59380026d1e3f122778425e48e2edcd
---
 src/mem/bus.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 518c9dbfa..cb359734b 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -180,7 +180,7 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (!(pkt->isResponse() || pkt->isExpressSnoop()) &&
+    if (!pkt->isExpressSnoop() &&
         (tickNextIdle > curTick ||
          (retryList.size() && (!inRetry || src_port != retryList.front()))))
     {
@@ -189,7 +189,9 @@ Bus::recvTiming(PacketPtr pkt)
         return false;
     }
 
-    occupyBus(pkt);
+    if (!pkt->isExpressSnoop()) {
+        occupyBus(pkt);
+    }
 
     short dest = pkt->getDest();
     int dest_port_id;
-- 
cgit v1.2.3


From 2f93db6f95b02d2bedf9571330a3185ac3fa7fa9 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Sun, 29 Jul 2007 20:17:03 -0700
Subject: memory system: fix functional access bug. Make sure not to keep
 processing functional accesses after they've been responded to. Also use
 checkFunctional() return value instead of checking packet command field where
 possible, mostly just for consistency.

--HG--
extra : convert_revision : 29fc76bc18731bd93a4ed05a281297827028ef75
---
 src/mem/cache/base_cache.cc |  4 ++--
 src/mem/cache/cache_impl.hh |  8 ++++----
 src/mem/physical.cc         | 12 ++++++------
 src/mem/tport.cc            | 13 +++++++------
 src/mem/tport.hh            |  2 +-
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index ec9e1cf9b..b44468486 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -81,9 +81,9 @@ BaseCache::CachePort::deviceBlockSize()
 void
 BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
 {
-    checkFunctional(pkt);
-    if (!pkt->isResponse())
+    if (!checkFunctional(pkt)) {
         sendFunctional(pkt);
+    }
 }
 
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index c1b01d676..d144266ed 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1253,9 +1253,9 @@ template<class TagStore>
 void
 Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
-    checkFunctional(pkt);
-    if (!pkt->isResponse())
+    if (!checkFunctional(pkt)) {
         myCache()->functionalAccess(pkt, cache->memSidePort);
+    }
 }
 
 
@@ -1327,9 +1327,9 @@ template<class TagStore>
 void
 Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
-    checkFunctional(pkt);
-    if (!pkt->isResponse())
+    if (!checkFunctional(pkt)) {
         myCache()->functionalAccess(pkt, cache->cpuSidePort);
+    }
 }
 
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index b96fb8a56..2f358daf2 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -400,12 +400,12 @@ PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 void
 PhysicalMemory::MemoryPort::recvFunctional(PacketPtr pkt)
 {
-    checkFunctional(pkt);
-
-    // Default implementation of SimpleTimingPort::recvFunctional()
-    // calls recvAtomic() and throws away the latency; we can save a
-    // little here by just not calculating the latency.
-    memory->doFunctionalAccess(pkt);
+    if (!checkFunctional(pkt)) {
+        // Default implementation of SimpleTimingPort::recvFunctional()
+        // calls recvAtomic() and throws away the latency; we can save a
+        // little here by just not calculating the latency.
+        memory->doFunctionalAccess(pkt);
+    }
 }
 
 unsigned int
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index e4b8d70e9..b1a6a4813 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -30,7 +30,7 @@
 
 #include "mem/tport.hh"
 
-void
+bool
 SimpleTimingPort::checkFunctional(PacketPtr pkt)
 {
     DeferredPacketIterator i = transmitList.begin();
@@ -41,19 +41,20 @@ SimpleTimingPort::checkFunctional(PacketPtr pkt)
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (pkt->checkFunctional(target)) {
-            return;
+            return true;
         }
     }
+
+    return false;
 }
 
 void
 SimpleTimingPort::recvFunctional(PacketPtr pkt)
 {
-    checkFunctional(pkt);
-
-    // Just do an atomic access and throw away the returned latency
-    if (!pkt->isResponse())
+    if (!checkFunctional(pkt)) {
+        // Just do an atomic access and throw away the returned latency
         recvAtomic(pkt);
+    }
 }
 
 bool
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index bc9da6c44..d0f1be425 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -99,7 +99,7 @@ class SimpleTimingPort : public Port
 
     /** Check the list of buffered packets against the supplied
      * functional request. */
-    void checkFunctional(PacketPtr funcPkt);
+    bool checkFunctional(PacketPtr funcPkt);
 
     /** Check whether we have a packet ready to go on the transmit list. */
     bool deferredPacketReady()
-- 
cgit v1.2.3


From 62aa1d7f559622dcb04b1b2fe1e2ecec375883a3 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Fri, 3 Aug 2007 03:51:13 -0400
Subject: cache: get rid of obsolete params from python.

--HG--
extra : convert_revision : cd40e0ef938ef6da1cccedf7be01c3ac5b4883fb
---
 src/mem/cache/BaseCache.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 86148f821..2bf44cdf9 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -34,15 +34,9 @@ class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb']
 
 class BaseCache(MemObject):
     type = 'BaseCache'
-    adaptive_compression = Param.Bool(False,
-        "Use an adaptive compression scheme")
     assoc = Param.Int("associativity")
     block_size = Param.Int("block size in bytes")
     latency = Param.Latency("Latency")
-    compressed_bus = Param.Bool(False,
-        "This cache connects to a compressed memory")
-    compression_latency = Param.Latency('0ns',
-        "Latency in cycles of compression algorithm")
     hash_delay = Param.Int(1, "time in cycles of hash access")
     lifo = Param.Bool(False,
         "whether this NIC partition should use LIFO repl. policy")
@@ -56,8 +50,6 @@ class BaseCache(MemObject):
     split = Param.Bool(False, "whether or not this cache is split")
     split_size = Param.Int(0,
         "How many ways of the cache belong to CPU/LRU partition")
-    store_compressed = Param.Bool(False,
-        "Store compressed data in the cache")
     subblock_size = Param.Int(0,
         "Size of subblock in IIC used for compression")
     tgts_per_mshr = Param.Int("max number of accesses per MSHR")
-- 
cgit v1.2.3


From a0bf2535a80c59329c8c14bba724b90fba05da4b Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Fri, 3 Aug 2007 18:03:59 -0400
Subject: tests: config.out no longer exists, eliminate ref copy.

--HG--
extra : convert_revision : e2d6aa61aa2ffd1a9d16260244512eeb1fe4d5a3
---
 tests/SConscript                                   |    2 +-
 .../00.gzip/ref/alpha/tru64/o3-timing/config.out   |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../00.gzip/ref/sparc/linux/o3-timing/config.out   |  366 ------
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../30.eon/ref/alpha/tru64/o3-timing/config.out    |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../50.vortex/ref/alpha/tru64/o3-timing/config.out |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../60.bzip2/ref/alpha/tru64/o3-timing/config.out  |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../70.twolf/ref/alpha/tru64/o3-timing/config.out  |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../sparc/solaris/t1000-simple-atomic/config.out   |  417 -------
 .../00.hello/ref/alpha/linux/o3-timing/config.out  |  366 ------
 .../ref/alpha/linux/simple-atomic/config.out       |   58 -
 .../ref/alpha/linux/simple-timing/config.out       |  177 ---
 .../00.hello/ref/alpha/tru64/o3-timing/config.out  |  366 ------
 .../ref/alpha/tru64/simple-atomic/config.out       |   58 -
 .../ref/alpha/tru64/simple-timing/config.out       |  177 ---
 .../ref/mips/linux/simple-atomic/config.out        |   58 -
 .../ref/mips/linux/simple-timing/config.out        |  177 ---
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../ref/alpha/linux/o3-timing/config.out           |  382 ------
 .../ref/sparc/linux/o3-timing/config.out           |  366 ------
 .../ref/sparc/linux/simple-atomic/config.out       |   58 -
 .../ref/sparc/linux/simple-timing/config.out       |  177 ---
 .../linux/tsunami-simple-atomic-dual/config.out    |  888 --------------
 .../alpha/linux/tsunami-simple-atomic/config.out   |  773 ------------
 .../linux/tsunami-simple-timing-dual/config.out    |  888 --------------
 .../alpha/linux/tsunami-simple-timing/config.out   |  773 ------------
 .../20.eio-short/ref/alpha/eio/detailed/config.out |  279 -----
 .../ref/alpha/eio/simple-atomic/config.out         |   49 -
 .../ref/alpha/eio/simple-timing/config.out         |  168 ---
 .../50.memtest/ref/alpha/linux/memtest/config.out  |  516 --------
 .../linux/twosys-tsunami-simple-atomic/config.out  | 1268 --------------------
 51 files changed, 1 insertion(+), 13221 deletions(-)
 delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
 delete mode 100644 tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out
 delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out
 delete mode 100644 tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out
 delete mode 100644 tests/quick/00.hello/ref/mips/linux/simple-timing/config.out
 delete mode 100644 tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
 delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
 delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
 delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out
 delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out
 delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out
 delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out
 delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out
 delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out
 delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out
 delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out
 delete mode 100644 tests/quick/50.memtest/ref/alpha/linux/memtest/config.out
 delete mode 100644 tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out

diff --git a/tests/SConscript b/tests/SConscript
index 472fa3f4c..812ce8c11 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -114,7 +114,7 @@ def update_test(target, source, env):
     src_dir = str(source[1].get_dir())
     dest_files = os.listdir(dest_dir)
     src_files = os.listdir(src_dir)
-    for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini', 'config.out'):
+    for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini'):
         if f in dest_files:
             print "  Replacing file", f
             dest_files.remove(f)
diff --git a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index 24d41aaa7..000000000
--- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out b/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index 589507187..000000000
--- a/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out b/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index 5cab10662..000000000
--- a/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
deleted file mode 100644
index b8a2728b3..000000000
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out b/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index 9c608a7e6..000000000
--- a/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index d8a055b90..000000000
--- a/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=gzip input.log 1
-executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index b84a9d780..000000000
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=mcf mcf.in
-executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index 81e06c995..000000000
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=mcf mcf.in
-executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out b/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index cea0c0402..000000000
--- a/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out b/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index b7319250f..000000000
--- a/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out b/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index c3af4f4b3..000000000
--- a/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out b/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index 0e4ea1cb5..000000000
--- a/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=perlbmk -I. -I lib lgred.makerand.pl
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/perlbmk
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/40.perlbmk/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out b/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index 676b65128..000000000
--- a/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=perlbmk -I. -I lib lgred.makerand.pl
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/perlbmk
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/40.perlbmk/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out b/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index 071b401c0..000000000
--- a/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=vortex lendian.raw
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out b/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index bf2c5c795..000000000
--- a/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=vortex lendian.raw
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out b/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index c0cb264bc..000000000
--- a/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=vortex lendian.raw
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out b/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index 4d97fe26f..000000000
--- a/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=vortex bendian.raw
-executable=/dist/m5/cpu2000/binaries/sparc/linux/vortex
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/50.vortex/sparc/linux/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out b/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index c2fb507ae..000000000
--- a/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=vortex bendian.raw
-executable=/dist/m5/cpu2000/binaries/sparc/linux/vortex
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/50.vortex/sparc/linux/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out b/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index ea4848b9b..000000000
--- a/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=bzip2 input.source 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out b/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index fc081bf5e..000000000
--- a/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=bzip2 input.source 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out b/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index 55a09db2b..000000000
--- a/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=bzip2 input.source 1
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out b/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index e3bf50f10..000000000
--- a/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=twolf smred
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/o3-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out b/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index 47defa937..000000000
--- a/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=twolf smred
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out b/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index 3ed492885..000000000
--- a/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=twolf smred
-executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf
-input=cin
-output=cout
-env=
-cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out b/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index d448056f4..000000000
--- a/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=twolf smred
-executable=/dist/m5/cpu2000/binaries/sparc/linux/twolf
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/70.twolf/sparc/linux/simple-atomic
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out b/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index f79151c21..000000000
--- a/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=twolf smred
-executable=/dist/m5/cpu2000/binaries/sparc/linux/twolf
-input=cin
-output=cout
-env=
-cwd=build/SPARC_SE/tests/fast/long/70.twolf/sparc/linux/simple-timing
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out b/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out
deleted file mode 100644
index bc35fc4e7..000000000
--- a/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out
+++ /dev/null
@@ -1,417 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[1048576,68157439]
-latency=1
-zero=true
-
-[system.rom]
-type=PhysicalMemory
-file=
-range=[1099243192320,1099251580927]
-latency=1
-zero=false
-
-[system.nvram]
-type=PhysicalMemory
-file=
-range=[133429198848,133429207039]
-latency=1
-zero=false
-
-[system.hypervisor_desc]
-type=PhysicalMemory
-file=
-range=[133446500352,133446508543]
-latency=1
-zero=false
-
-[system.partition_desc]
-type=PhysicalMemory
-file=
-range=[133445976064,133445984255]
-latency=1
-zero=false
-
-[system]
-type=SparcSystem
-physmem=system.physmem
-rom=system.rom
-nvram=system.nvram
-hypervisor_desc=system.hypervisor_desc
-partition_desc=system.partition_desc
-mem_mode=atomic
-reset_addr=1099243192320
-hypervisor_addr=1099243257856
-openboot_addr=1099243716608
-nvram_addr=133429198848
-hypervisor_desc_addr=133446500352
-partition_desc_addr=133445976064
-kernel=
-reset_bin=/dist/m5/system/binaries/reset_new.bin
-hypervisor_bin=/dist/m5/system/binaries/q_new.bin
-openboot_bin=/dist/m5/system/binaries/openboot_new.bin
-nvram_bin=/dist/m5/system/binaries/nvram1
-hypervisor_desc_bin=/dist/m5/system/binaries/1up-hv.bin
-partition_desc_bin=/dist/m5/system/binaries/1up-md.bin
-boot_cpu_frequency=1
-boot_osflags=a
-readfile=tests/halt.sh
-init_param=0
-
-[system.membus]
-type=Bus
-bus_id=1
-clock=2
-width=64
-responder_set=false
-block_size=64
-
-[system.intrctrl]
-type=IntrControl
-sys=system
-
-[system.t1000]
-type=T1000
-system=system
-intrctrl=system.intrctrl
-
-[system.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=0
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.t1000
-system=system
-
-[system.physmem2]
-type=PhysicalMemory
-file=
-range=[2147483648,2415919103]
-latency=1
-zero=true
-
-[system.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=100
-nack_delay=8
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[system.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/disk.s10hw2
-read_only=true
-
-[system.disk0.image]
-type=CowDiskImage
-child=system.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk0]
-type=MmDisk
-pio_addr=134217728000
-pio_latency=2
-// pio_size not specified
-platform=system.t1000
-system=system
-image=system.disk0.image
-
-[system.t1000.hconsole]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.t1000.hvuart]
-type=Uart8250
-pio_addr=1099255955456
-pio_latency=2
-platform=system.t1000
-sim_console=system.t1000.hconsole
-system=system
-
-[system.t1000.htod]
-type=DumbTOD
-pio_addr=1099255906296
-pio_latency=2
-platform=system.t1000
-system=system
-time=2009 1 1 0 0 0 3 1
-
-[system.t1000.pconsole]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.t1000.puart0]
-type=Uart8250
-pio_addr=133412421632
-pio_latency=2
-platform=system.t1000
-sim_console=system.t1000.pconsole
-system=system
-
-[system.t1000.fake_membnks]
-type=IsaFake
-pio_addr=648540061696
-pio_latency=2
-pio_size=16384
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=0
-platform=system.t1000
-system=system
-
-[system.t1000.fake_ssi]
-type=IsaFake
-pio_addr=1095216660480
-pio_latency=2
-pio_size=268435456
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2_4]
-type=IsaFake
-pio_addr=725849473216
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=1
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2_1]
-type=IsaFake
-pio_addr=725849473024
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=1
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2_2]
-type=IsaFake
-pio_addr=725849473088
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=1
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2_3]
-type=IsaFake
-pio_addr=725849473152
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=1
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2esr_3]
-type=IsaFake
-pio_addr=734439407744
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=0
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2esr_2]
-type=IsaFake
-pio_addr=734439407680
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=0
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2esr_1]
-type=IsaFake
-pio_addr=734439407616
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=0
-platform=system.t1000
-system=system
-
-[system.t1000.fake_l2esr_4]
-type=IsaFake
-pio_addr=734439407808
-pio_latency=2
-pio_size=8
-ret_bad_addr=false
-update_data=true
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=0
-platform=system.t1000
-system=system
-
-[system.t1000.iob]
-type=Iob
-pio_latency=2
-platform=system.t1000
-system=system
-
-[system.t1000.fake_clk]
-type=IsaFake
-pio_addr=644245094400
-pio_latency=2
-pio_size=4294967296
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.t1000
-system=system
-
-[system.t1000.fake_jbi]
-type=IsaFake
-pio_addr=549755813888
-pio_latency=2
-pio_size=4294967296
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.t1000
-system=system
-
-[system.iobus]
-type=Bus
-bus_id=0
-clock=2
-width=64
-responder_set=false
-block_size=64
-
-[system.iobus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=0
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.t1000
-system=system
-
-[system.cpu.itb]
-type=SparcITB
-size=64
-
-[system.cpu.dtb]
-type=SparcDTB
-size=64
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-itb=system.cpu.itb
-dtb=system.cpu.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=1
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
deleted file mode 100644
index 7cb2e7d7d..000000000
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out
deleted file mode 100644
index 117159126..000000000
--- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out
deleted file mode 100644
index f7852a616..000000000
--- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
deleted file mode 100644
index 0cb6591c8..000000000
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/tru64/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out
deleted file mode 100644
index acc734991..000000000
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/tru64/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out
deleted file mode 100644
index 241630ead..000000000
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/tru64/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out
deleted file mode 100644
index 06a3d271d..000000000
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/mips/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out
deleted file mode 100644
index 3f8a51cf4..000000000
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/mips/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index 1666790d0..000000000
--- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/sparc/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index 89910d3c9..000000000
--- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/sparc/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
deleted file mode 100644
index 45b063eb3..000000000
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
+++ /dev/null
@@ -1,382 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload0]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.workload1]
-type=LiveProcess
-cmd=hello
-executable=tests/test-progs/hello/bin/alpha/linux/hello
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload0 system.cpu.workload1
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
deleted file mode 100644
index bdf29a72a..000000000
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
+++ /dev/null
@@ -1,366 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=insttest
-executable=tests/test-progs/insttest/bin/sparc/linux/insttest
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=500
-phase=0
-numThreads=1
-cpu_id=0
-activity=0
-workload=system.cpu.workload
-checker=null
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=20
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
deleted file mode 100644
index c1a77ba0d..000000000
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out
+++ /dev/null
@@ -1,58 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=insttest
-executable=tests/test-progs/insttest/bin/sparc/linux/insttest
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out
deleted file mode 100644
index df1a9c852..000000000
--- a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out
+++ /dev/null
@@ -1,177 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=LiveProcess
-cmd=insttest
-executable=tests/test-progs/insttest/bin/sparc/linux/insttest
-input=cin
-output=cout
-env=
-cwd=
-system=system
-uid=100
-euid=100
-gid=100
-egid=100
-pid=100
-ppid=99
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out
deleted file mode 100644
index 1461f2550..000000000
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out
+++ /dev/null
@@ -1,888 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=LinuxAlphaSystem
-boot_cpu_frequency=500
-physmem=system.physmem
-mem_mode=atomic
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=tests/halt.sh
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[system.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.intrctrl]
-type=IntrControl
-sys=system
-
-[system.tsunami]
-type=Tsunami
-system=system
-intrctrl=system.intrctrl
-
-[system.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.l2c]
-type=BaseCache
-size=4194304
-assoc=8
-block_size=64
-latency=10000
-mshrs=92
-tgts_per_mshr=16
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[system.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.disk0.image]
-type=CowDiskImage
-child=system.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk0]
-type=IdeDisk
-image=system.disk0.image
-driveID=master
-delay=1000000
-
-[system.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[system.disk2.image]
-type=CowDiskImage
-child=system.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk2]
-type=IdeDisk
-image=system.disk2.image
-driveID=master
-delay=1000000
-
-[system.cpu0.itb]
-type=AlphaITB
-size=48
-
-[system.cpu0.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu0]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-itb=system.cpu0.itb
-dtb=system.cpu0.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
-[system.cpu0.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu0.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu0.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu0.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu0.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu0.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu1.itb]
-type=AlphaITB
-size=48
-
-[system.cpu1.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu1]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=1
-itb=system.cpu1.itb
-dtb=system.cpu1.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
-[system.cpu1.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu1.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu1.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu1.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu1.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu1.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.simple_disk]
-type=SimpleDisk
-system=system
-disk=system.simple_disk.disk
-
-[system.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=system.tsunami
-system=system
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=system.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=system.tsunami
-system=system
-
-[system.sim_console]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.tsunami.console]
-type=AlphaConsole
-sim_console=system.sim_console
-disk=system.simple_disk
-pio_addr=8804682956800
-system=system
-cpu=system.cpu0
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=system
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[system.tsunami.ethernet]
-type=NSGigE
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:01
-rx_thread=false
-tx_thread=false
-rss=false
-
-[system.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=system.tsunami.ethernet
-
-[system.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=system.tsunami
-sim_console=system.sim_console
-system=system
-
-[system.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[system.tsunami.ide]
-type=IdeController
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=system.disk0 system.disk2
-
-[system.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-[system.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.toL2Bus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out
deleted file mode 100644
index a196b7dc6..000000000
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out
+++ /dev/null
@@ -1,773 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=LinuxAlphaSystem
-boot_cpu_frequency=500
-physmem=system.physmem
-mem_mode=atomic
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=tests/halt.sh
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[system.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.intrctrl]
-type=IntrControl
-sys=system
-
-[system.tsunami]
-type=Tsunami
-system=system
-intrctrl=system.intrctrl
-
-[system.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.l2c]
-type=BaseCache
-size=4194304
-assoc=8
-block_size=64
-latency=10000
-mshrs=92
-tgts_per_mshr=16
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[system.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.disk0.image]
-type=CowDiskImage
-child=system.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk0]
-type=IdeDisk
-image=system.disk0.image
-driveID=master
-delay=1000000
-
-[system.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[system.disk2.image]
-type=CowDiskImage
-child=system.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk2]
-type=IdeDisk
-image=system.disk2.image
-driveID=master
-delay=1000000
-
-[system.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.simple_disk]
-type=SimpleDisk
-system=system
-disk=system.simple_disk.disk
-
-[system.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=system.tsunami
-system=system
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=system.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=system.tsunami
-system=system
-
-[system.sim_console]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.cpu.itb]
-type=AlphaITB
-size=48
-
-[system.cpu.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-itb=system.cpu.itb
-dtb=system.cpu.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
-[system.tsunami.console]
-type=AlphaConsole
-sim_console=system.sim_console
-disk=system.simple_disk
-pio_addr=8804682956800
-system=system
-cpu=system.cpu
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=system
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[system.tsunami.ethernet]
-type=NSGigE
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:01
-rx_thread=false
-tx_thread=false
-rss=false
-
-[system.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=system.tsunami.ethernet
-
-[system.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=system.tsunami
-sim_console=system.sim_console
-system=system
-
-[system.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[system.tsunami.ide]
-type=IdeController
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=system.disk0 system.disk2
-
-[system.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.toL2Bus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-[system.cpu.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out
deleted file mode 100644
index bb98fee3e..000000000
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out
+++ /dev/null
@@ -1,888 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=LinuxAlphaSystem
-boot_cpu_frequency=500
-physmem=system.physmem
-mem_mode=timing
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=tests/halt.sh
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[system.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.intrctrl]
-type=IntrControl
-sys=system
-
-[system.tsunami]
-type=Tsunami
-system=system
-intrctrl=system.intrctrl
-
-[system.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.l2c]
-type=BaseCache
-size=4194304
-assoc=8
-block_size=64
-latency=10000
-mshrs=92
-tgts_per_mshr=16
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[system.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.disk0.image]
-type=CowDiskImage
-child=system.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk0]
-type=IdeDisk
-image=system.disk0.image
-driveID=master
-delay=1000000
-
-[system.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[system.disk2.image]
-type=CowDiskImage
-child=system.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk2]
-type=IdeDisk
-image=system.disk2.image
-driveID=master
-delay=1000000
-
-[system.cpu0.itb]
-type=AlphaITB
-size=48
-
-[system.cpu0.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu0]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-itb=system.cpu0.itb
-dtb=system.cpu0.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu0.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu0.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu0.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu0.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu0.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu0.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu1.itb]
-type=AlphaITB
-size=48
-
-[system.cpu1.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu1]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=1
-itb=system.cpu1.itb
-dtb=system.cpu1.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu1.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu1.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu1.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu1.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu1.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu1.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.simple_disk]
-type=SimpleDisk
-system=system
-disk=system.simple_disk.disk
-
-[system.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=system.tsunami
-system=system
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=system.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=system.tsunami
-system=system
-
-[system.sim_console]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.tsunami.console]
-type=AlphaConsole
-sim_console=system.sim_console
-disk=system.simple_disk
-pio_addr=8804682956800
-system=system
-cpu=system.cpu0
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=system
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[system.tsunami.ethernet]
-type=NSGigE
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:01
-rx_thread=false
-tx_thread=false
-rss=false
-
-[system.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=system.tsunami.ethernet
-
-[system.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=system.tsunami
-sim_console=system.sim_console
-system=system
-
-[system.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[system.tsunami.ide]
-type=IdeController
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=system.disk0 system.disk2
-
-[system.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-[system.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.toL2Bus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out
deleted file mode 100644
index e0e32bce4..000000000
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out
+++ /dev/null
@@ -1,773 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=LinuxAlphaSystem
-boot_cpu_frequency=500
-physmem=system.physmem
-mem_mode=timing
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=tests/halt.sh
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[system.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.intrctrl]
-type=IntrControl
-sys=system
-
-[system.tsunami]
-type=Tsunami
-system=system
-intrctrl=system.intrctrl
-
-[system.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.l2c]
-type=BaseCache
-size=4194304
-assoc=8
-block_size=64
-latency=10000
-mshrs=92
-tgts_per_mshr=16
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[system.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.disk0.image]
-type=CowDiskImage
-child=system.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk0]
-type=IdeDisk
-image=system.disk0.image
-driveID=master
-delay=1000000
-
-[system.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[system.disk2.image]
-type=CowDiskImage
-child=system.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[system.disk2]
-type=IdeDisk
-image=system.disk2.image
-driveID=master
-delay=1000000
-
-[system.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[system.simple_disk]
-type=SimpleDisk
-system=system
-disk=system.simple_disk.disk
-
-[system.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=system.tsunami
-system=system
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=system.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=system.tsunami
-system=system
-
-[system.sim_console]
-type=SimConsole
-intr_control=system.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[system.cpu.itb]
-type=AlphaITB
-size=48
-
-[system.cpu.dtb]
-type=AlphaDTB
-size=64
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-itb=system.cpu.itb
-dtb=system.cpu.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.tsunami.console]
-type=AlphaConsole
-sim_console=system.sim_console
-disk=system.simple_disk
-pio_addr=8804682956800
-system=system
-cpu=system.cpu
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=system.tsunami
-system=system
-tsunami=system.tsunami
-
-[system.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=system
-platform=system.tsunami
-pio_latency=1000
-
-[system.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[system.tsunami.ethernet]
-type=NSGigE
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:01
-rx_thread=false
-tx_thread=false
-rss=false
-
-[system.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=system.tsunami.ethernet
-
-[system.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=system.tsunami
-sim_console=system.sim_console
-system=system
-
-[system.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[system.tsunami.ide]
-type=IdeController
-system=system
-platform=system.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=system.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=system.disk0 system.disk2
-
-[system.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.toL2Bus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=system.tsunami
-system=system
-
-[system.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-[system.cpu.icache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu.icache]
-type=BaseCache
-size=32768
-assoc=1
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu.icache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu.dcache]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=4
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu.dcache.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out b/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out
deleted file mode 100644
index c92557696..000000000
--- a/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out
+++ /dev/null
@@ -1,279 +0,0 @@
-[root]
-type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
-
-[system.physmem]
-type=PhysicalMemory
-file=
-// range not specified
-latency=1
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.workload]
-type=EioProcess
-file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz
-chkpt=
-output=cout
-system=system
-
-[system.cpu.mem]
-type=Bus
-bus_id=0
-
-[system.cpu.fuPool.FUList0.opList0]
-type=OpDesc
-opClass=IntAlu
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList0]
-type=FUDesc
-opList=system.cpu.fuPool.FUList0.opList0
-count=6
-
-[system.cpu.fuPool.FUList1.opList0]
-type=OpDesc
-opClass=IntMult
-opLat=3
-issueLat=1
-
-[system.cpu.fuPool.FUList1.opList1]
-type=OpDesc
-opClass=IntDiv
-opLat=20
-issueLat=19
-
-[system.cpu.fuPool.FUList1]
-type=FUDesc
-opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
-count=2
-
-[system.cpu.fuPool.FUList2.opList0]
-type=OpDesc
-opClass=FloatAdd
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList1]
-type=OpDesc
-opClass=FloatCmp
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2.opList2]
-type=OpDesc
-opClass=FloatCvt
-opLat=2
-issueLat=1
-
-[system.cpu.fuPool.FUList2]
-type=FUDesc
-opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
-count=4
-
-[system.cpu.fuPool.FUList3.opList0]
-type=OpDesc
-opClass=FloatMult
-opLat=4
-issueLat=1
-
-[system.cpu.fuPool.FUList3.opList1]
-type=OpDesc
-opClass=FloatDiv
-opLat=12
-issueLat=12
-
-[system.cpu.fuPool.FUList3.opList2]
-type=OpDesc
-opClass=FloatSqrt
-opLat=24
-issueLat=24
-
-[system.cpu.fuPool.FUList3]
-type=FUDesc
-opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
-count=2
-
-[system.cpu.fuPool.FUList4.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList4]
-type=FUDesc
-opList=system.cpu.fuPool.FUList4.opList0
-count=0
-
-[system.cpu.fuPool.FUList5.opList0]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList5]
-type=FUDesc
-opList=system.cpu.fuPool.FUList5.opList0
-count=0
-
-[system.cpu.fuPool.FUList6.opList0]
-type=OpDesc
-opClass=MemRead
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6.opList1]
-type=OpDesc
-opClass=MemWrite
-opLat=1
-issueLat=1
-
-[system.cpu.fuPool.FUList6]
-type=FUDesc
-opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
-count=4
-
-[system.cpu.fuPool.FUList7.opList0]
-type=OpDesc
-opClass=IprAccess
-opLat=3
-issueLat=3
-
-[system.cpu.fuPool.FUList7]
-type=FUDesc
-opList=system.cpu.fuPool.FUList7.opList0
-count=1
-
-[system.cpu.fuPool]
-type=FUPool
-FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
-
-[system.cpu]
-type=DerivO3CPU
-clock=1
-numThreads=1
-activity=0
-workload=system.workload
-mem=system.cpu.mem
-checker=null
-max_insts_any_thread=500000
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-cachePorts=200
-decodeToFetchDelay=1
-renameToFetchDelay=1
-iewToFetchDelay=1
-commitToFetchDelay=1
-fetchWidth=8
-renameToDecodeDelay=1
-iewToDecodeDelay=1
-commitToDecodeDelay=1
-fetchToDecodeDelay=1
-decodeWidth=8
-iewToRenameDelay=1
-commitToRenameDelay=1
-decodeToRenameDelay=1
-renameWidth=8
-commitToIEWDelay=1
-renameToIEWDelay=2
-issueToExecuteDelay=1
-dispatchWidth=8
-issueWidth=8
-wbWidth=8
-wbDepth=1
-fuPool=system.cpu.fuPool
-iewToCommitDelay=1
-renameToROBDelay=1
-commitWidth=8
-squashWidth=8
-trapLatency=13
-backComSize=5
-forwardComSize=5
-predType=tournament
-localPredictorSize=2048
-localCtrBits=2
-localHistoryTableSize=2048
-localHistoryBits=11
-globalPredictorSize=8192
-globalCtrBits=2
-globalHistoryBits=13
-choicePredictorSize=8192
-choiceCtrBits=2
-BTBEntries=4096
-BTBTagSize=16
-RASSize=16
-LQEntries=32
-SQEntries=32
-LFSTSize=1024
-SSITSize=1024
-numPhysIntRegs=256
-numPhysFloatRegs=256
-numIQEntries=64
-numROBEntries=192
-smtNumFetchingThreads=1
-smtFetchPolicy=SingleThread
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-smtCommitPolicy=RoundRobin
-instShiftAmt=2
-defer_registration=false
-function_trace=false
-function_trace_start=0
-
-[trace]
-flags=
-start=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-pc_symbol=true
-intel_format=false
-trace_system=client
-
-[debug]
-break_cycles=
-
diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out
deleted file mode 100644
index 73c363bc4..000000000
--- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out
+++ /dev/null
@@ -1,49 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=EioProcess
-file=tests/test-progs/anagram/bin/alpha/eio/anagram-vshort.eio.gz
-chkpt=
-output=cout
-system=system
-
-[system.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=500000
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out
deleted file mode 100644
index e85a0bee1..000000000
--- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out
+++ /dev/null
@@ -1,168 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=atomic
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.workload]
-type=EioProcess
-file=tests/test-progs/anagram/bin/alpha/eio/anagram-vshort.eio.gz
-chkpt=
-output=cout
-system=system
-
-[system.cpu]
-type=TimingSimpleCPU
-max_insts_any_thread=500000
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=system
-cpu_id=0
-workload=system.cpu.workload
-clock=500
-phase=0
-defer_registration=false
-// width not specified
-function_trace=false
-function_trace_start=0
-// simulate_stalls not specified
-
-[system.cpu.toL2Bus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[system.cpu.icache]
-type=BaseCache
-size=131072
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.dcache]
-type=BaseCache
-size=262144
-assoc=2
-block_size=64
-latency=1000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu.l2cache]
-type=BaseCache
-size=2097152
-assoc=2
-block_size=64
-latency=10000
-mshrs=10
-tgts_per_mshr=5
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out
deleted file mode 100644
index 6bf1f2712..000000000
--- a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out
+++ /dev/null
@@ -1,516 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[system.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system]
-type=System
-physmem=system.physmem
-mem_mode=timing
-
-[system.membus]
-type=Bus
-bus_id=0
-clock=2
-width=16
-responder_set=false
-block_size=64
-
-[system.l2c]
-type=BaseCache
-size=65536
-assoc=8
-block_size=64
-latency=10000
-mshrs=92
-tgts_per_mshr=16
-write_buffers=8
-prioritizeRequests=false
-protocol=null
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=100000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu6]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu6.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu6.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu6.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu4]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu4.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu4.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu4.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu5]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu5.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu5.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu5.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu2]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu2.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu2.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu2.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu3]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu3.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu3.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu3.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu0]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu0.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu0.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu0.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.cpu1]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu1.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu1.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu1.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.funcmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[system.cpu7]
-type=MemTest
-memory_size=65536
-percent_reads=65
-percent_functional=50
-percent_uncacheable=10
-progress_interval=10000
-percent_source_unaligned=50
-percent_dest_unaligned=50
-trace_addr=0
-max_loads=100000
-atomic=false
-
-[system.cpu7.l1c.protocol]
-type=CoherenceProtocol
-protocol=moesi
-do_upgrades=true
-
-[system.cpu7.l1c]
-type=BaseCache
-size=32768
-assoc=4
-block_size=64
-latency=1000
-mshrs=12
-tgts_per_mshr=8
-write_buffers=8
-prioritizeRequests=false
-protocol=system.cpu7.l1c.protocol
-trace_addr=0
-hash_delay=1
-repl=null
-compressed_bus=false
-store_compressed=false
-adaptive_compression=false
-compression_latency=0
-block_size=64
-max_miss_count=0
-addr_range=[0,18446744073709551615]
-split=false
-split_size=0
-lifo=false
-two_queue=false
-prefetch_miss=false
-prefetch_access=false
-prefetcher_size=100
-prefetch_past_page=false
-prefetch_serial_squash=false
-prefetch_latency=10000
-prefetch_degree=1
-prefetch_policy=none
-prefetch_cache_check_push=true
-prefetch_use_cpu_id=true
-prefetch_data_accesses_only=false
-
-[system.toL2Bus]
-type=Bus
-bus_id=0
-clock=2
-width=16
-responder_set=false
-block_size=64
-
diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out
deleted file mode 100644
index 1ed581be9..000000000
--- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out
+++ /dev/null
@@ -1,1268 +0,0 @@
-[root]
-type=Root
-dummy=0
-
-[testsys.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[testsys]
-type=LinuxAlphaSystem
-boot_cpu_frequency=1
-physmem=testsys.physmem
-mem_mode=atomic
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-stream-client.rcS
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[testsys.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[testsys.intrctrl]
-type=IntrControl
-sys=testsys
-
-[testsys.tsunami]
-type=Tsunami
-system=testsys
-intrctrl=testsys.intrctrl
-
-[testsys.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[testsys.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[testsys.disk0.image]
-type=CowDiskImage
-child=testsys.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[testsys.disk0]
-type=IdeDisk
-image=testsys.disk0.image
-driveID=master
-delay=1000000
-
-[testsys.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[testsys.disk2.image]
-type=CowDiskImage
-child=testsys.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[testsys.disk2]
-type=IdeDisk
-image=testsys.disk2.image
-driveID=master
-delay=1000000
-
-[testsys.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[testsys.simple_disk]
-type=SimpleDisk
-system=testsys
-disk=testsys.simple_disk.disk
-
-[testsys.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=testsys.tsunami
-system=testsys
-tsunami=testsys.tsunami
-
-[testsys.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=testsys.tsunami
-system=testsys
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=testsys.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=testsys.tsunami
-system=testsys
-
-[testsys.sim_console]
-type=SimConsole
-intr_control=testsys.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[testsys.cpu.itb]
-type=AlphaITB
-size=48
-
-[testsys.cpu.dtb]
-type=AlphaDTB
-size=64
-
-[testsys.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=testsys
-cpu_id=0
-itb=testsys.cpu.itb
-dtb=testsys.cpu.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=1
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
-[testsys.tsunami.console]
-type=AlphaConsole
-sim_console=testsys.sim_console
-disk=testsys.simple_disk
-pio_addr=8804682956800
-system=testsys
-cpu=testsys.cpu
-platform=testsys.tsunami
-pio_latency=1000
-
-[testsys.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=testsys.tsunami
-system=testsys
-tsunami=testsys.tsunami
-
-[testsys.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=testsys
-platform=testsys.tsunami
-pio_latency=1000
-
-[testsys.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[testsys.tsunami.ethernet]
-type=NSGigE
-system=testsys
-platform=testsys.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=testsys.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:02
-rx_thread=false
-tx_thread=false
-rss=false
-
-[testsys.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=testsys.tsunami.ethernet
-
-[testsys.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=testsys.tsunami
-sim_console=testsys.sim_console
-system=testsys
-
-[testsys.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=testsys.tsunami
-system=testsys
-
-[testsys.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[testsys.tsunami.ide]
-type=IdeController
-system=testsys
-platform=testsys.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=testsys.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=testsys.disk0 testsys.disk2
-
-[testsys.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-[drivesys.physmem]
-type=PhysicalMemory
-file=
-range=[0,134217727]
-latency=1
-zero=false
-
-[drivesys]
-type=LinuxAlphaSystem
-boot_cpu_frequency=1
-physmem=drivesys.physmem
-mem_mode=atomic
-kernel=/dist/m5/system/binaries/vmlinux
-console=/dist/m5/system/binaries/console
-pal=/dist/m5/system/binaries/ts_osfpal
-boot_osflags=root=/dev/hda1 console=ttyS0
-readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-server.rcS
-symbolfile=
-init_param=0
-system_type=34
-system_rev=1024
-
-[drivesys.intrctrl]
-type=IntrControl
-sys=drivesys
-
-[drivesys.tsunami]
-type=Tsunami
-system=drivesys
-intrctrl=drivesys.intrctrl
-
-[drivesys.tsunami.ethernet.configdata]
-type=PciConfigData
-VendorID=4107
-DeviceID=34
-Command=0
-Status=656
-Revision=0
-ProgIF=0
-SubClassCode=0
-ClassCode=2
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=0
-BAR2=0
-BAR3=0
-BAR4=0
-BAR5=0
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=30
-InterruptPin=1
-MinimumGrant=176
-MaximumLatency=52
-BAR0Size=256
-BAR1Size=4096
-BAR2Size=0
-BAR3Size=0
-BAR4Size=0
-BAR5Size=0
-
-[drivesys.tsunami.ethernet]
-type=NSGigE
-system=drivesys
-platform=drivesys.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=drivesys.tsunami.ethernet.configdata
-pci_bus=0
-pci_dev=1
-pci_func=0
-pio_latency=1000
-config_latency=20000
-clock=0
-dma_desc_free=false
-dma_data_free=false
-dma_read_delay=0
-dma_write_delay=0
-dma_read_factor=0
-dma_write_factor=0
-dma_no_allocate=true
-intr_delay=10000000
-rx_delay=1000000
-tx_delay=1000000
-rx_fifo_size=524288
-tx_fifo_size=524288
-rx_filter=true
-hardware_address=00:90:00:00:00:01
-rx_thread=false
-tx_thread=false
-rss=false
-
-[drivesys.tsunami.etherint]
-type=NSGigEInt
-peer=null
-device=drivesys.tsunami.ethernet
-
-[etherdump]
-type=EtherDump
-file=ethertrace
-maxlen=96
-
-[etherlink]
-type=EtherLink
-int1=testsys.tsunami.etherint
-int2=drivesys.tsunami.etherint
-speed=8000
-delay=0
-delay_var=0
-dump=etherdump
-
-[drivesys.membus]
-type=Bus
-bus_id=1
-clock=1000
-width=64
-responder_set=false
-block_size=64
-
-[drivesys.membus.responder]
-type=IsaFake
-pio_addr=0
-pio_latency=1
-pio_size=8
-ret_bad_addr=true
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.bridge]
-type=Bridge
-req_size_a=16
-req_size_b=16
-resp_size_a=16
-resp_size_b=16
-delay=50000
-nack_delay=4000
-write_ack=false
-fix_partial_write_a=false
-fix_partial_write_b=true
-
-[drivesys.disk0.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[drivesys.disk0.image]
-type=CowDiskImage
-child=drivesys.disk0.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[drivesys.disk0]
-type=IdeDisk
-image=drivesys.disk0.image
-driveID=master
-delay=1000000
-
-[drivesys.disk2.image.child]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-bigswap2.img
-read_only=true
-
-[drivesys.disk2.image]
-type=CowDiskImage
-child=drivesys.disk2.image.child
-image_file=
-table_size=65536
-read_only=false
-
-[drivesys.disk2]
-type=IdeDisk
-image=drivesys.disk2.image
-driveID=master
-delay=1000000
-
-[drivesys.simple_disk.disk]
-type=RawDiskImage
-image_file=/dist/m5/system/disks/linux-latest.img
-read_only=true
-
-[drivesys.simple_disk]
-type=SimpleDisk
-system=drivesys
-disk=drivesys.simple_disk.disk
-
-[drivesys.tsunami.fake_uart1]
-type=IsaFake
-pio_addr=8804615848696
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_uart2]
-type=IsaFake
-pio_addr=8804615848936
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_uart3]
-type=IsaFake
-pio_addr=8804615848680
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_uart4]
-type=IsaFake
-pio_addr=8804615848944
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_ppc]
-type=IsaFake
-pio_addr=8804615848891
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.cchip]
-type=TsunamiCChip
-pio_addr=8803072344064
-pio_latency=1000
-platform=drivesys.tsunami
-system=drivesys
-tsunami=drivesys.tsunami
-
-[drivesys.tsunami.io]
-type=TsunamiIO
-pio_addr=8804615847936
-pio_latency=1000
-frequency=976562500
-platform=drivesys.tsunami
-system=drivesys
-time=2009 1 1 0 0 0 3 1
-year_is_bcd=false
-tsunami=drivesys.tsunami
-
-[]
-type=PciConfigAll
-pio_latency=1
-bus=0
-size=16777216
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.sim_console]
-type=SimConsole
-intr_control=drivesys.intrctrl
-output=console
-port=3456
-append_name=true
-number=0
-
-[drivesys.cpu.itb]
-type=AlphaITB
-size=48
-
-[drivesys.cpu.dtb]
-type=AlphaDTB
-size=64
-
-[drivesys.cpu]
-type=AtomicSimpleCPU
-max_insts_any_thread=0
-max_insts_all_threads=0
-max_loads_any_thread=0
-max_loads_all_threads=0
-progress_interval=0
-system=drivesys
-cpu_id=0
-itb=drivesys.cpu.itb
-dtb=drivesys.cpu.dtb
-profile=0
-do_quiesce=true
-do_checkpoint_insts=true
-do_statistics_insts=true
-clock=1
-phase=0
-defer_registration=false
-width=1
-function_trace=false
-function_trace_start=0
-simulate_stalls=false
-
-[drivesys.tsunami.console]
-type=AlphaConsole
-sim_console=drivesys.sim_console
-disk=drivesys.simple_disk
-pio_addr=8804682956800
-system=drivesys
-cpu=drivesys.cpu
-platform=drivesys.tsunami
-pio_latency=1000
-
-[drivesys.tsunami.fake_ata1]
-type=IsaFake
-pio_addr=8804615848304
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_ata0]
-type=IsaFake
-pio_addr=8804615848432
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.pchip]
-type=TsunamiPChip
-pio_addr=8802535473152
-pio_latency=1000
-platform=drivesys.tsunami
-system=drivesys
-tsunami=drivesys.tsunami
-
-[drivesys.tsunami.fake_pnp_read3]
-type=IsaFake
-pio_addr=8804615848643
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read2]
-type=IsaFake
-pio_addr=8804615848579
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read1]
-type=IsaFake
-pio_addr=8804615848515
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read0]
-type=IsaFake
-pio_addr=8804615848451
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read7]
-type=IsaFake
-pio_addr=8804615848899
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read6]
-type=IsaFake
-pio_addr=8804615848835
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read5]
-type=IsaFake
-pio_addr=8804615848771
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_read4]
-type=IsaFake
-pio_addr=8804615848707
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_write]
-type=IsaFake
-pio_addr=8804615850617
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fb]
-type=BadDevice
-devicename=FrameBuffer
-pio_addr=8804615848912
-system=drivesys
-platform=drivesys.tsunami
-pio_latency=1000
-
-[drivesys.tsunami.fake_OROM]
-type=IsaFake
-pio_addr=8796093677568
-pio_latency=1000
-pio_size=393216
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.uart]
-type=Uart8250
-pio_addr=8804615848952
-pio_latency=1000
-platform=drivesys.tsunami
-sim_console=drivesys.sim_console
-system=drivesys
-
-[drivesys.tsunami.fake_sm_chip]
-type=IsaFake
-pio_addr=8804615848816
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.fake_pnp_addr]
-type=IsaFake
-pio_addr=8804615848569
-pio_latency=1000
-pio_size=8
-ret_bad_addr=false
-update_data=false
-warn_access=
-ret_data8=255
-ret_data16=65535
-ret_data32=4294967295
-ret_data64=18446744073709551615
-platform=drivesys.tsunami
-system=drivesys
-
-[drivesys.tsunami.ide.configdata]
-type=PciConfigData
-VendorID=32902
-DeviceID=28945
-Command=0
-Status=640
-Revision=0
-ProgIF=133
-SubClassCode=1
-ClassCode=1
-CacheLineSize=0
-LatencyTimer=0
-HeaderType=0
-BIST=0
-BAR0=1
-BAR1=1
-BAR2=1
-BAR3=1
-BAR4=1
-BAR5=1
-CardbusCIS=0
-SubsystemVendorID=0
-SubsystemID=0
-ExpansionROM=0
-InterruptLine=31
-InterruptPin=1
-MinimumGrant=0
-MaximumLatency=0
-BAR0Size=8
-BAR1Size=4
-BAR2Size=8
-BAR3Size=4
-BAR4Size=16
-BAR5Size=0
-
-[drivesys.tsunami.ide]
-type=IdeController
-system=drivesys
-platform=drivesys.tsunami
-min_backoff_delay=4000
-max_backoff_delay=10000000
-configdata=drivesys.tsunami.ide.configdata
-pci_bus=0
-pci_dev=0
-pci_func=0
-pio_latency=1000
-config_latency=20000
-disks=drivesys.disk0 drivesys.disk2
-
-[drivesys.iobus]
-type=Bus
-bus_id=0
-clock=1000
-width=64
-responder_set=true
-block_size=64
-
-- 
cgit v1.2.3


From 851e3c852be4eb031293ed271502a0e14ca9273f Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Fri, 3 Aug 2007 18:04:26 -0400
Subject: tests: replace all dest ref files on upgrade (if possible).
 Originally we were copying all source files in, but this caused problems when
 (large) inputs were copied along with outputs. Then we switched to just
 copying the standard files (m5stats.txt, etc.) but that was missing things
 like the *.console files. This fix should catch all the non-standard files
 too as long as they are copied in manually once when the test is set up. Also
 get a lot nicer about warning when files are ignored, and warn when expected
 files are missing. Those new Python sets sure are handy.

--HG--
extra : convert_revision : 55c046de124522499af74a471968677c020bbf38
---
 tests/SConscript | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/tests/SConscript b/tests/SConscript
index 812ce8c11..62c4d0508 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -102,6 +102,19 @@ def print_test(target, source, env):
 
 printAction = env.Action(print_test, strfunction = None)
 
+# Static vars for update_test:
+# - long-winded message about ignored sources
+ignore_msg = '''
+Note: The following file(s) will not be copied.  New non-standard
+      output files must be copied manually once before update_ref will
+      recognize them as outputs.  Otherwise they are assumed to be
+      inputs and are ignored.
+'''
+# - reference files always needed
+needed_files = set(['stdout', 'stderr', 'm5stats.txt', 'config.ini'])
+# - source files we always want to ignore
+known_ignores = set(['status', 'outdiff', 'statsdiff'])
+
 def update_test(target, source, env):
     """Update reference test outputs.
 
@@ -112,9 +125,21 @@ def update_test(target, source, env):
     """
     dest_dir = str(source[0].get_dir())
     src_dir = str(source[1].get_dir())
-    dest_files = os.listdir(dest_dir)
-    src_files = os.listdir(src_dir)
-    for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini'):
+    dest_files = set(os.listdir(dest_dir))
+    src_files = set(os.listdir(src_dir))
+    # Copy all of the required files plus any existing dest files.
+    wanted_files = needed_files | dest_files
+    missing_files = wanted_files - src_files
+    if len(missing_files) > 0:
+        print "  WARNING: the following file(s) are missing " \
+              "and will not be updated:"
+        print "    ", " ,".join(missing_files)
+    copy_files = wanted_files - missing_files
+    warn_ignored_files = (src_files - copy_files) - known_ignores
+    if len(warn_ignored_files) > 0:
+        print ignore_msg,
+        print "       ", ", ".join(warn_ignored_files)
+    for f in copy_files:
         if f in dest_files:
             print "  Replacing file", f
             dest_files.remove(f)
@@ -123,12 +148,6 @@ def update_test(target, source, env):
         copyAction = Copy(os.path.join(dest_dir, f), os.path.join(src_dir, f))
         copyAction.strfunction = None
         Execute(copyAction)
-    # warn about any files in dest not overwritten (other than SCCS dir)
-    if 'SCCS' in dest_files:
-        dest_files.remove('SCCS')
-    if dest_files:
-        print "Warning: file(s) in", dest_dir, "not updated:",
-        print ', '.join(dest_files)
     return 0
 
 def update_test_string(target, source, env):
-- 
cgit v1.2.3


From bb3f7dc83b9a4c7b20aeb893fea447854c855225 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Fri, 3 Aug 2007 18:04:30 -0400
Subject: tests: new ref outputs for new cache model

--HG--
extra : convert_revision : 244749072f97e425c2ca1cf296f2b95f37e99eb6
---
 .../00.hello/ref/alpha/linux/o3-timing/config.ini  |   45 +-
 .../00.hello/ref/alpha/linux/o3-timing/m5stats.txt |  517 ++++----
 .../00.hello/ref/alpha/linux/o3-timing/stdout      |    6 +-
 .../ref/alpha/linux/simple-atomic/config.ini       |    6 +-
 .../ref/alpha/linux/simple-atomic/m5stats.txt      |    7 +-
 .../00.hello/ref/alpha/linux/simple-atomic/stdout  |    8 +-
 .../ref/alpha/linux/simple-timing/config.ini       |   24 +-
 .../ref/alpha/linux/simple-timing/m5stats.txt      |  148 ++-
 .../00.hello/ref/alpha/linux/simple-timing/stdout  |   10 +-
 .../00.hello/ref/alpha/tru64/o3-timing/config.ini  |   45 +-
 .../00.hello/ref/alpha/tru64/o3-timing/m5stats.txt |  478 +++----
 .../00.hello/ref/alpha/tru64/o3-timing/stdout      |    6 +-
 .../ref/alpha/tru64/simple-atomic/config.ini       |    6 +-
 .../ref/alpha/tru64/simple-atomic/m5stats.txt      |    7 +-
 .../00.hello/ref/alpha/tru64/simple-atomic/stdout  |    8 +-
 .../ref/alpha/tru64/simple-timing/config.ini       |   24 +-
 .../ref/alpha/tru64/simple-timing/m5stats.txt      |  101 +-
 .../00.hello/ref/alpha/tru64/simple-timing/stdout  |   10 +-
 .../ref/mips/linux/simple-atomic/config.ini        |    8 +-
 .../ref/mips/linux/simple-atomic/m5stats.txt       |    8 +-
 .../00.hello/ref/mips/linux/simple-atomic/stdout   |    4 +-
 .../ref/mips/linux/simple-timing/config.ini        |   32 +-
 .../ref/mips/linux/simple-timing/m5stats.txt       |  108 +-
 .../00.hello/ref/mips/linux/simple-timing/stdout   |    6 +-
 .../ref/sparc/linux/simple-atomic/config.ini       |    8 +-
 .../ref/sparc/linux/simple-atomic/m5stats.txt      |    8 +-
 .../00.hello/ref/sparc/linux/simple-atomic/stdout  |    4 +-
 .../ref/sparc/linux/simple-timing/config.ini       |   32 +-
 .../ref/sparc/linux/simple-timing/m5stats.txt      |  165 +--
 .../00.hello/ref/sparc/linux/simple-timing/stdout  |    6 +-
 .../ref/alpha/linux/o3-timing/config.ini           |   45 +-
 .../ref/alpha/linux/o3-timing/m5stats.txt          |  732 +++++------
 .../ref/alpha/linux/o3-timing/stdout               |    6 +-
 .../linux/tsunami-simple-atomic-dual/config.ini    |   68 +-
 .../linux/tsunami-simple-atomic-dual/m5stats.txt   |  242 ++--
 .../alpha/linux/tsunami-simple-atomic-dual/stderr  |    6 +-
 .../alpha/linux/tsunami-simple-atomic-dual/stdout  |   11 +-
 .../alpha/linux/tsunami-simple-atomic/config.ini   |   38 +-
 .../alpha/linux/tsunami-simple-atomic/m5stats.txt  |  184 ++-
 .../ref/alpha/linux/tsunami-simple-atomic/stderr   |    4 +-
 .../ref/alpha/linux/tsunami-simple-atomic/stdout   |   11 +-
 .../linux/tsunami-simple-timing-dual/config.ini    |   68 +-
 .../linux/tsunami-simple-timing-dual/m5stats.txt   | 1036 ++++++++--------
 .../alpha/linux/tsunami-simple-timing-dual/stderr  |    8 +-
 .../alpha/linux/tsunami-simple-timing-dual/stdout  |   11 +-
 .../alpha/linux/tsunami-simple-timing/config.ini   |   38 +-
 .../alpha/linux/tsunami-simple-timing/m5stats.txt  |  474 ++++---
 .../ref/alpha/linux/tsunami-simple-timing/stderr   |    4 +-
 .../ref/alpha/linux/tsunami-simple-timing/stdout   |   11 +-
 .../50.memtest/ref/alpha/linux/memtest/config.ini  |  102 +-
 .../50.memtest/ref/alpha/linux/memtest/m5stats.txt | 1299 ++++++++------------
 .../50.memtest/ref/alpha/linux/memtest/stderr      |  146 +--
 .../50.memtest/ref/alpha/linux/memtest/stdout      |   10 +-
 .../linux/twosys-tsunami-simple-atomic/config.ini  |   20 +-
 .../linux/twosys-tsunami-simple-atomic/m5stats.txt |   12 +-
 .../linux/twosys-tsunami-simple-atomic/stderr      |    8 +-
 .../linux/twosys-tsunami-simple-atomic/stdout      |   10 +-
 57 files changed, 2963 insertions(+), 3486 deletions(-)

diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index f112ef506..f145eee43 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=DerivO3CPU
-children=dcache fuPool icache l2cache toL2Bus workload
+children=dcache fuPool icache l2cache toL2Bus tracer workload
 BTBEntries=4096
 BTBTagSize=16
 LFSTSize=1024
@@ -86,6 +86,7 @@ smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
+tracer=system.cpu.tracer
 trapLatency=13
 wbDepth=1
 wbWidth=8
@@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -118,12 +116,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL
 
 [system.cpu.fuPool.FUList0]
 type=FUDesc
-children=opList0
+children=opList
 count=6
-opList=system.cpu.fuPool.FUList0.opList0
+opList=system.cpu.fuPool.FUList0.opList
 
-[system.cpu.fuPool.FUList0.opList0]
+[system.cpu.fuPool.FUList0.opList]
 type=OpDesc
 issueLat=1
 opClass=IntAlu
@@ -217,11 +213,11 @@ opLat=24
 
 [system.cpu.fuPool.FUList4]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList4.opList0
+opList=system.cpu.fuPool.FUList4.opList
 
-[system.cpu.fuPool.FUList4.opList0]
+[system.cpu.fuPool.FUList4.opList]
 type=OpDesc
 issueLat=1
 opClass=MemRead
@@ -229,11 +225,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList5]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList5.opList0
+opList=system.cpu.fuPool.FUList5.opList
 
-[system.cpu.fuPool.FUList5.opList0]
+[system.cpu.fuPool.FUList5.opList]
 type=OpDesc
 issueLat=1
 opClass=MemWrite
@@ -259,11 +255,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList7]
 type=FUDesc
-children=opList0
+children=opList
 count=1
-opList=system.cpu.fuPool.FUList7.opList0
+opList=system.cpu.fuPool.FUList7.opList
 
-[system.cpu.fuPool.FUList7.opList0]
+[system.cpu.fuPool.FUList7.opList]
 type=OpDesc
 issueLat=3
 opClass=IprAccess
@@ -271,12 +267,9 @@ opLat=3
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -294,12 +287,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -333,12 +321,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -356,6 +342,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 2ac86dd84..02095a557 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          522                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      1584                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                      57                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    422                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1088                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         1837                       # Number of BP lookups
-global.BPredUnit.usedRAS                          241                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  39303                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 153768                       # Number of bytes of host memory used
-host_seconds                                     0.14                       # Real time elapsed on the host
-host_tick_rate                               32016268                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 17                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores               127                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1874                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1142                       # Number of stores inserted to the mem dependence unit.
+global.BPredUnit.BTBHits                          543                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      1720                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                      59                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                    423                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   1175                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         2025                       # Number of BP lookups
+global.BPredUnit.usedRAS                          277                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  29843                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154572                       # Number of bytes of host memory used
+host_seconds                                     0.19                       # Real time elapsed on the host
+host_tick_rate                               22095832                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 31                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores               133                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  1967                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1200                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
-sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     4589500                       # Number of ticks simulated
+sim_seconds                                  0.000004                       # Number of seconds simulated
+sim_ticks                                     4170500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               104                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               105                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         8521                      
+system.cpu.commit.COM:committed_per_cycle.samples         7614                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6200   7276.14%           
-                               1         1160   1361.34%           
-                               2          469    550.40%           
-                               3          177    207.72%           
-                               4          131    153.74%           
-                               5           98    115.01%           
-                               6          109    127.92%           
-                               7           73     85.67%           
-                               8          104    122.05%           
+                               0         5315   6980.56%           
+                               1         1182   1552.40%           
+                               2          399    524.03%           
+                               3          192    252.17%           
+                               4          125    164.17%           
+                               5           99    130.02%           
+                               6          130    170.74%           
+                               7           67     88.00%           
+                               8          105    137.90%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,70 +43,70 @@ system.cpu.commit.COM:loads                       979                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                       1791                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               350                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               349                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            3571                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            3957                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                               1.636315                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.636315                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1470                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5932.330827                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency         5380                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1337                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         789000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.090476                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  133                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                33                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       538000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.068027                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses             100                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  4504.373178                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5116.438356                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   469                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1545000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.422414                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 343                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              270                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       373500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
+system.cpu.cpi                               1.483372                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         1.483372                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1487                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  8188.118812                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  5495.049505                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1386                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         827000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.067922                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  101                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                34                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency       555000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.067922                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses             101                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               561                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 18316.091954                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5068.965517                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   474                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       1593500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.155080                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  87                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              251                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       441000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.155080                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             87                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  10.439306                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  10.770115                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2282                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  4903.361345                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1806                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2334000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.208589                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   476                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                303                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       911500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.075811                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              173                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_accesses                2048                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency        12875                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  5297.872340                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1860                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2420500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.091797                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   188                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                285                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency       996000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.091797                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              188                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2282                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  4903.361345                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2048                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency        12875                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  5297.872340                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1806                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2334000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.208589                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  476                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               303                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       911500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.075811                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             173                       # number of overall MSHR misses
+system.cpu.dcache.overall_hits                   1860                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2420500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.091797                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  188                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits               285                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency       996000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.091797                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             188                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -119,91 +119,91 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                    173                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    174                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                112.669258                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1806                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                112.600183                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1874                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles            389                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred             75                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           143                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           10466                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles              6230                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               1855                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             679                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            228                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:BlockedCycles            391                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred             82                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           164                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           11387                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles              5174                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               2002                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             726                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            244                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles             48                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        1837                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      1469                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          3456                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   267                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          11417                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                     455                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.199652                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               1469                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                763                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.240843                       # Number of inst fetches per cycle
+system.cpu.fetch.Branches                        2025                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      1529                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          3690                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   212                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          12463                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     457                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.242777                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               1529                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                820                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        1.494185                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                9201                      
+system.cpu.fetch.rateDist.samples                8341                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7216   7842.63%           
-                               1          168    182.59%           
-                               2          148    160.85%           
-                               3          136    147.81%           
-                               4          214    232.58%           
-                               5          138    149.98%           
-                               6          177    192.37%           
-                               7           95    103.25%           
-                               8          909    987.94%           
+                               0         6181   7410.38%           
+                               1          173    207.41%           
+                               2          174    208.61%           
+                               3          151    181.03%           
+                               4          219    262.56%           
+                               5          157    188.23%           
+                               6          179    214.60%           
+                               7          102    122.29%           
+                               8         1005   1204.89%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               1469                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5381.818182                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4530.448718                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   1139                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1776000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.224643                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  330                       # number of ReadReq misses
+system.cpu.icache.ReadReq_accesses               1511                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5621.019108                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4464.968153                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   1197                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1765000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.207809                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  314                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1413500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.212389                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             312                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_miss_latency      1402000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.207809                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             314                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.650641                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.812102                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                1469                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5381.818182                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    1139                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1776000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.224643                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   330                       # number of demand (read+write) misses
+system.cpu.icache.demand_accesses                1511                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5621.019108                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4464.968153                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    1197                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1765000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.207809                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   314                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 18                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1413500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.212389                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              312                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_miss_latency      1402000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.207809                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              314                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               1469                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5381.818182                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               1511                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5621.019108                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4464.968153                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   1139                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1776000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.224643                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  330                       # number of overall misses
+system.cpu.icache.overall_hits                   1197                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1765000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.207809                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  314                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                18                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1413500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.212389                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             312                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_miss_latency      1402000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.207809                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             314                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -216,76 +216,76 @@ system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.icache.replacements                      0                       # number of replacements
-system.cpu.icache.sampled_refs                    312                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    314                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                165.921810                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     1139                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                167.838424                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     1197                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                            2474                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     1144                       # Number of branches executed
-system.cpu.iew.EXEC:nop                            40                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.835018                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         2519                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                        977                       # Number of stores executed
+system.cpu.idleCycles                            1997                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     1159                       # Number of branches executed
+system.cpu.iew.EXEC:nop                            43                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.933581                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         2561                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                        971                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      5193                       # num instructions consuming a value
-system.cpu.iew.WB:count                          7387                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.742923                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      5329                       # num instructions consuming a value
+system.cpu.iew.WB:count                          7480                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.739351                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      3858                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.802848                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           7452                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  373                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:producers                      3940                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.896775                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           7559                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  402                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       4                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  1874                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                 22                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               302                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 1142                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                9228                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  1542                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               285                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  7683                       # Number of executed instructions
+system.cpu.iew.iewDispLoadInsts                  1967                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                 23                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts               240                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 1200                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts                9614                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  1590                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               364                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  7787                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    679                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    726                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              50                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.0.ignoredResponses            4                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.forwLoads              48                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses            3                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           63                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           70                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          895                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          330                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             63                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          262                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            111                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.611129                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.611129                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    7968                       # Type of FU issued
+system.cpu.iew.lsq.thread.0.squashedLoads          988                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          388                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             70                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          287                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            115                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.674140                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.674140                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    8151                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
-                      No_OpClass            2      0.03%            # Type of FU issued
-                          IntAlu         5314     66.69%            # Type of FU issued
+                      No_OpClass            2      0.02%            # Type of FU issued
+                          IntAlu         5422     66.52%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            2      0.03%            # Type of FU issued
+                        FloatAdd            2      0.02%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1659     20.82%            # Type of FU issued
-                        MemWrite          990     12.42%            # Type of FU issued
+                         MemRead         1720     21.10%            # Type of FU issued
+                        MemWrite         1004     12.32%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   105                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.013178                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   104                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.012759                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                       No_OpClass            0      0.00%            # attempts to use FU when none available
                           IntAlu            0      0.00%            # attempts to use FU when none available
@@ -297,77 +297,96 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           70     66.67%            # attempts to use FU when none available
-                        MemWrite           35     33.33%            # attempts to use FU when none available
+                         MemRead           70     67.31%            # attempts to use FU when none available
+                        MemWrite           34     32.69%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         9201                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         8341                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         5952   6468.86%           
-                               1         1111   1207.48%           
-                               2          928   1008.59%           
-                               3          433    470.60%           
-                               4          378    410.82%           
-                               5          251    272.80%           
-                               6          111    120.64%           
-                               7           27     29.34%           
-                               8           10     10.87%           
+                               0         5104   6119.17%           
+                               1         1084   1299.60%           
+                               2          829    993.89%           
+                               3          533    639.01%           
+                               4          366    438.80%           
+                               5          258    309.32%           
+                               6          126    151.06%           
+                               7           28     33.57%           
+                               8           13     15.59%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.865993                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       9166                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      7968                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                  22                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            3154                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.ISSUE:rate                     0.977221                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                       9548                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      8151                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                  23                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined            3578                       # Number of squashed instructions iterated over during squash; mainly for profiling
 system.cpu.iq.iqSquashedInstsIssued                22                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved              5                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         2035                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               483                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4644.927536                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2467.908903                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2243500                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 483                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1192000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            483                       # number of ReadReq MSHR misses
+system.cpu.iq.iqSquashedNonSpecRemoved              6                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined         2360                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadExReq_accesses              73                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency  3643.835616                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency  2643.835616                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency       266000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                73                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       193000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           73                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               415                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  3406.779661                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2406.779661                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency       1407000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.995181                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 413                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency       994000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.995181                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            413                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency  3392.857143                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency  2392.857143                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency        47500                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency        33500                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.005013                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                483                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4644.927536                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2243500                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  483                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_accesses                488                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  3442.386831                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2442.386831                       # average overall mshr miss latency
+system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency        1673000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.995902                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  486                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1192000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             483                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency      1187000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.995902                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             486                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               483                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4644.927536                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               488                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  3442.386831                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2442.386831                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2243500                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 483                       # number of overall misses
+system.cpu.l2cache.overall_hits                     2                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency       1673000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.995902                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 486                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1192000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            483                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency      1187000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.995902                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            486                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -380,29 +399,29 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   483                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   399                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               278.204751                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
+system.cpu.l2cache.tagsinuse               223.758944                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             9201                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles               15                       # Number of cycles rename is blocking
+system.cpu.numCycles                             8341                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles               14                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles              6382                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents             70                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          12837                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           10018                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         7477                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               1754                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             679                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            101                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              3426                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles          270                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:IdleCycles              5339                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents             71                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:RenameLookups          13891                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           10852                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         8114                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               1888                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             726                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            102                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              4063                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles          272                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           26                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                380                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts           20                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                              26                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.rename.RENAME:skidInsts                382                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts           21                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index 142cb9695..22c3e0435 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 21 2007 21:25:27
-M5 started Fri Jun 22 00:04:38 2007
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:12 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 4589500 because target called exit()
+Exiting @ tick 4170500 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini
index e4dfe86d3..d025afdec 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=AtomicSimpleCPU
-children=workload
+children=tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -25,11 +25,15 @@ phase=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu.tracer
 width=1
 workload=system.cpu.workload
 dcache_port=system.membus.port[2]
 icache_port=system.membus.port[1]
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt
index f1c7bd968..f87ad2cd6 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt
@@ -1,8 +1,9 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  93019                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.06                       # Real time elapsed on the host
-host_tick_rate                               46199079                       # Simulator tick rate (ticks/s)
+host_inst_rate                                 109073                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 148564                       # Number of bytes of host memory used
+host_seconds                                     0.05                       # Real time elapsed on the host
+host_tick_rate                               54123810                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5642                       # Number of instructions simulated
 sim_seconds                                  0.000003                       # Number of seconds simulated
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout
index 58fc0e374..0bec3d18f 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:34 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/simple-atomic tests/run.py quick/00.hello/alpha/linux/simple-atomic
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:12 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/simple-atomic tests/run.py quick/00.hello/alpha/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
 Exiting @ tick 2820500 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini
index 47315cc1d..5ae318852 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=TimingSimpleCPU
-children=dcache icache l2cache toL2Bus workload
+children=dcache icache l2cache toL2Bus tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -24,17 +24,16 @@ max_loads_any_thread=0
 phase=0
 progress_interval=0
 system=system
+tracer=system.cpu.tracer
 workload=system.cpu.workload
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -52,12 +51,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1]
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -90,12 +85,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -128,12 +119,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -151,6 +140,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt
index 1b70f10b3..db7224c2e 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt
@@ -1,12 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  54390                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.10                       # Real time elapsed on the host
-host_tick_rate                              126525357                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  65923                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 154180                       # Number of bytes of host memory used
+host_seconds                                     0.09                       # Real time elapsed on the host
+host_tick_rate                              155349854                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5642                       # Number of instructions simulated
 sim_seconds                                  0.000013                       # Number of seconds simulated
-sim_ticks                                    13168000                       # Number of ticks simulated
+sim_ticks                                    13359000                       # Number of ticks simulated
 system.cpu.dcache.ReadReq_accesses                979                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency        14000                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency        13000                       # average ReadReq mshr miss latency
@@ -20,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses              92                       # nu
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency        14000                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency        13000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   739                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1022000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.089901                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  73                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency       949000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_hits                   725                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       1218000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.107143                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  87                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency      1131000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.107143                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             87                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                   9.854545                       # Average number of references to valid blocks.
@@ -38,14 +39,14 @@ system.cpu.dcache.cache_copies                      0                       # nu
 system.cpu.dcache.demand_accesses                1791                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1626                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2310000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.092127                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   165                       # number of demand (read+write) misses
+system.cpu.dcache.demand_hits                    1612                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2506000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.099944                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   179                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      2145000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.092127                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              165                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      2327000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.099944                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              179                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
@@ -53,14 +54,14 @@ system.cpu.dcache.overall_accesses               1791                       # nu
 system.cpu.dcache.overall_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1626                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2310000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.092127                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  165                       # number of overall misses
+system.cpu.dcache.overall_hits                   1612                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2506000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.099944                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  179                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      2145000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.092127                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             165                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      2327000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.099944                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             179                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -75,18 +76,18 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    165                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                103.640117                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                103.895955                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                     1626                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.icache.ReadReq_accesses               5643                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 13960.288809                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 12960.288809                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency 13992.779783                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 12992.779783                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                   5366                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3867000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        3876000                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.049087                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  277                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency      3590000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      3599000                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.049087                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             277                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -98,29 +99,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                5643                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 13960.288809                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 12960.288809                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency 13992.779783                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 12992.779783                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                    5366                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3867000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         3876000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.049087                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   277                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      3590000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      3599000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.049087                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              277                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses               5643                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 13960.288809                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 12960.288809                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency 13992.779783                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 12992.779783                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                   5366                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3867000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        3876000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.049087                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  277                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      3590000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      3599000                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.049087                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             277                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -137,53 +138,72 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    277                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                129.241810                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                129.745202                       # Cycle average of tags in use
 system.cpu.icache.total_refs                     5366                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses               441                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
+system.cpu.l2cache.ReadExReq_accesses              73                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency        12000                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        11000                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency       876000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                73                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       803000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           73                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               369                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency        12000                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       5733000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 441                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      4851000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            441                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_hits                     1                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency       4416000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.997290                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 368                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      4048000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.997290                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            368                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency        12000                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        11000                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       168000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       154000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.002825                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                441                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.demand_accesses                442                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5733000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
+system.cpu.l2cache.demand_hits                      1                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency        5292000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.997738                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  441                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_miss_latency      4851000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate     0.997738                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             441                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               441                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.overall_accesses               442                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5733000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
+system.cpu.l2cache.overall_hits                     1                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency       5292000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.997738                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 441                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_miss_latency      4851000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate     0.997738                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            441                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -197,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   441                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   354                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               232.802947                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
+system.cpu.l2cache.tagsinuse               179.464793                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                       1                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                         13168000                       # number of cpu cycles simulated
+system.cpu.numCycles                         13359000                       # number of cpu cycles simulated
 system.cpu.num_insts                             5642                       # Number of instructions executed
 system.cpu.num_refs                              1792                       # Number of memory references
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout
index 501ba5063..d25a0624e 100644
--- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:35 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/simple-timing tests/run.py quick/00.hello/alpha/linux/simple-timing
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:13 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/simple-timing tests/run.py quick/00.hello/alpha/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 13168000 because target called exit()
+Exiting @ tick 13359000 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index 36a50c983..ca7690f17 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=DerivO3CPU
-children=dcache fuPool icache l2cache toL2Bus workload
+children=dcache fuPool icache l2cache toL2Bus tracer workload
 BTBEntries=4096
 BTBTagSize=16
 LFSTSize=1024
@@ -86,6 +86,7 @@ smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
+tracer=system.cpu.tracer
 trapLatency=13
 wbDepth=1
 wbWidth=8
@@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -118,12 +116,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL
 
 [system.cpu.fuPool.FUList0]
 type=FUDesc
-children=opList0
+children=opList
 count=6
-opList=system.cpu.fuPool.FUList0.opList0
+opList=system.cpu.fuPool.FUList0.opList
 
-[system.cpu.fuPool.FUList0.opList0]
+[system.cpu.fuPool.FUList0.opList]
 type=OpDesc
 issueLat=1
 opClass=IntAlu
@@ -217,11 +213,11 @@ opLat=24
 
 [system.cpu.fuPool.FUList4]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList4.opList0
+opList=system.cpu.fuPool.FUList4.opList
 
-[system.cpu.fuPool.FUList4.opList0]
+[system.cpu.fuPool.FUList4.opList]
 type=OpDesc
 issueLat=1
 opClass=MemRead
@@ -229,11 +225,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList5]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList5.opList0
+opList=system.cpu.fuPool.FUList5.opList
 
-[system.cpu.fuPool.FUList5.opList0]
+[system.cpu.fuPool.FUList5.opList]
 type=OpDesc
 issueLat=1
 opClass=MemWrite
@@ -259,11 +255,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList7]
 type=FUDesc
-children=opList0
+children=opList
 count=1
-opList=system.cpu.fuPool.FUList7.opList0
+opList=system.cpu.fuPool.FUList7.opList
 
-[system.cpu.fuPool.FUList7.opList0]
+[system.cpu.fuPool.FUList7.opList]
 type=OpDesc
 issueLat=3
 opClass=IprAccess
@@ -271,12 +267,9 @@ opLat=3
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -294,12 +287,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -333,12 +321,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -356,6 +342,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index d400dcd22..f575843e4 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          132                       # Number of BTB hits
-global.BPredUnit.BTBLookups                       584                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                      28                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    208                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                    376                       # Number of conditional branches predicted
-global.BPredUnit.lookups                          738                       # Number of BP lookups
-global.BPredUnit.usedRAS                          140                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  39805                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 153128                       # Number of bytes of host memory used
-host_seconds                                     0.06                       # Real time elapsed on the host
-host_tick_rate                               34110715                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                  8                       # Number of conflicting loads.
+global.BPredUnit.BTBHits                          146                       # Number of BTB hits
+global.BPredUnit.BTBLookups                       613                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                      32                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                    212                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                    393                       # Number of conditional branches predicted
+global.BPredUnit.lookups                          777                       # Number of BP lookups
+global.BPredUnit.usedRAS                          153                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  24407                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153952                       # Number of bytes of host memory used
+host_seconds                                     0.10                       # Real time elapsed on the host
+host_tick_rate                               19202153                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                  7                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 7                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                   608                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                  357                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                   635                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                  367                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2387                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
-sim_ticks                                     2055000                       # Number of ticks simulated
+sim_ticks                                     1884000                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    396                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                41                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events                33                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         3910                      
+system.cpu.commit.COM:committed_per_cycle.samples         3543                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         2950   7544.76%           
-                               1          266    680.31%           
-                               2          336    859.34%           
-                               3          131    335.04%           
-                               4           76    194.37%           
-                               5           65    166.24%           
-                               6           27     69.05%           
-                               7           18     46.04%           
-                               8           41    104.86%           
+                               0         2580   7281.96%           
+                               1          265    747.95%           
+                               2          337    951.17%           
+                               3          138    389.50%           
+                               4           67    189.11%           
+                               5           69    194.75%           
+                               6           32     90.32%           
+                               7           22     62.09%           
+                               8           33     93.14%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,70 +43,70 @@ system.cpu.commit.COM:loads                       415                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                        709                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               128                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               131                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           2576                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls               4                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts             978                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            1118                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        2387                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  2387                       # Number of Instructions Simulated
-system.cpu.cpi                               1.723083                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.723083                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses                514                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5391.304348                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4669.491525                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                    445                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         372000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.134241                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   69                       # number of ReadReq misses
+system.cpu.cpi                               1.578969                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         1.578969                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses                518                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  6583.333333                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4891.666667                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                    458                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         395000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.115830                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                   60                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                10                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       275500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.114786                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses              59                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  5669.014085                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency         5020                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   223                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency        402500                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.241497                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  71                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits               46                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       125500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.085034                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             25                       # number of WriteReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_miss_latency       293500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.115830                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses              60                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses               239                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 14216.216216                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5202.702703                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                   202                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency        526000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.154812                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  37                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits               55                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       192500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.154812                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             37                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                   7.952381                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                   7.905882                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                 808                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5532.142857                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                     668                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency          774500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.173267                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   140                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                 56                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       401000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.103960                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses               84                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_accesses                 757                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  9494.845361                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  5010.309278                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                     660                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency          921000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.128137                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                    97                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                 65                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency       486000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.128137                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses               97                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses                808                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5532.142857                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses                757                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  9494.845361                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  5010.309278                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                    668                       # number of overall hits
-system.cpu.dcache.overall_miss_latency         774500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.173267                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  140                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits                56                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       401000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.103960                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses              84                       # number of overall MSHR misses
+system.cpu.dcache.overall_hits                    660                       # number of overall hits
+system.cpu.dcache.overall_miss_latency         921000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.128137                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                   97                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits                65                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency       486000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.128137                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses              97                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -119,89 +119,90 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.dcache.replacements                      0                       # number of replacements
-system.cpu.dcache.sampled_refs                     84                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                     85                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 51.873008                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                      668                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 51.399169                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                      672                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles             95                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred             81                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           123                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts            4033                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles              3045                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                771                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             202                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            298                       # Number of squashed instructions handled by decode
-system.cpu.fetch.Branches                         738                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                       654                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1444                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   120                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                           4685                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                     218                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.179431                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles                654                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                272                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.139071                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles             87                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred             83                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           125                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts            4218                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles              2648                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles                808                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             225                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            304                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:UnblockCycles              1                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                         777                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                       686                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          1528                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   107                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                           4951                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     223                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.206155                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles                686                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                299                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        1.313611                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                4113                      
+system.cpu.fetch.rateDist.samples                3769                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         3325   8084.12%           
-                               1           32     77.80%           
-                               2           80    194.51%           
-                               3           50    121.57%           
-                               4           99    240.70%           
-                               5           52    126.43%           
-                               6           39     94.82%           
-                               7           35     85.10%           
-                               8          401    974.96%           
+                               0         2929   7771.29%           
+                               1           36     95.52%           
+                               2           88    233.48%           
+                               3           54    143.27%           
+                               4          108    286.55%           
+                               5           55    145.93%           
+                               6           40    106.13%           
+                               7           42    111.44%           
+                               8          417   1106.39%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses                654                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5298.507463                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4556.451613                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                    453                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1065000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.307339                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  201                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                15                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       847500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.284404                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses                676                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5629.032258                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4489.247312                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                    490                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1047000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.275148                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  186                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                10                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency       835000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.275148                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             186                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   2.435484                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   2.634409                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                 654                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5298.507463                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                     453                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1065000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.307339                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   201                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 15                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       847500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.284404                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_accesses                 676                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5629.032258                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4489.247312                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                     490                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1047000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.275148                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   186                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                 10                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency       835000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.275148                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              186                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses                654                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5298.507463                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses                676                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5629.032258                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4489.247312                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                    453                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1065000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.307339                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  201                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                15                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       847500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.284404                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_hits                    490                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1047000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.275148                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  186                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                10                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency       835000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.275148                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             186                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -217,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    186                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                106.293956                       # Cycle average of tags in use
-system.cpu.icache.total_refs                      453                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                104.691657                       # Cycle average of tags in use
+system.cpu.icache.total_refs                      490                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                            2992                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                      501                       # Number of branches executed
-system.cpu.iew.EXEC:nop                           234                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.726477                       # Inst execution rate
-system.cpu.iew.EXEC:refs                          878                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                        333                       # Number of stores executed
+system.cpu.idleCycles                             998                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                      516                       # Number of branches executed
+system.cpu.iew.EXEC:nop                           242                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.810295                       # Inst execution rate
+system.cpu.iew.EXEC:refs                          894                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                        334                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      1652                       # num instructions consuming a value
-system.cpu.iew.WB:count                          2914                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.799637                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      1725                       # num instructions consuming a value
+system.cpu.iew.WB:count                          2987                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.794203                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      1321                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.708485                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           2931                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  135                       # Number of branch mispredicts detected at execute
+system.cpu.iew.WB:producers                      1370                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.792518                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           3007                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  146                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                   608                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                  7                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               179                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                  357                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                3571                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                   545                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts                87                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  2988                       # Number of executed instructions
+system.cpu.iew.iewDispLoadInsts                   635                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                  6                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts                92                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                  367                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts                3711                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                   560                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               111                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  3054                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    202                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    225                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              22                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.forwLoads              24                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           10                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           12                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          193                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores           63                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             10                       # Number of memory order violations
+system.cpu.iew.lsq.thread.0.squashedLoads          220                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores           73                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             12                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect           98                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect             37                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.580355                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.580355                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    3075                       # Type of FU issued
+system.cpu.iew.predictedTakenIncorrect             48                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.633324                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.633324                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    3165                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                       No_OpClass            0      0.00%            # Type of FU issued
-                          IntAlu         2178     70.83%            # Type of FU issued
+                          IntAlu         2243     70.87%            # Type of FU issued
                          IntMult            1      0.03%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            0      0.00%            # Type of FU issued
@@ -278,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead          561     18.24%            # Type of FU issued
-                        MemWrite          335     10.89%            # Type of FU issued
+                         MemRead          581     18.36%            # Type of FU issued
+                        MemWrite          340     10.74%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
 system.cpu.iq.ISSUE:fu_busy_cnt                    35                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.011382                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.011058                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                       No_OpClass            0      0.00%            # attempts to use FU when none available
-                          IntAlu            2      5.71%            # attempts to use FU when none available
+                          IntAlu            1      2.86%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -297,41 +298,60 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
                          MemRead           12     34.29%            # attempts to use FU when none available
-                        MemWrite           21     60.00%            # attempts to use FU when none available
+                        MemWrite           22     62.86%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         4113                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         3769                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         2848   6924.39%           
-                               1          479   1164.60%           
-                               2          276    671.04%           
-                               3          213    517.87%           
-                               4          158    384.15%           
-                               5           86    209.09%           
-                               6           34     82.66%           
-                               7           13     31.61%           
-                               8            6     14.59%           
+                               0         2469   6550.81%           
+                               1          494   1310.69%           
+                               2          274    726.98%           
+                               3          234    620.85%           
+                               4          152    403.29%           
+                               5           87    230.83%           
+                               6           40    106.13%           
+                               7           14     37.15%           
+                               8            5     13.27%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.747629                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       3330                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      3075                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                   7                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined             790                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedNonSpecRemoved              3                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined          409                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               270                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4509.259259                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2388.888889                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1217500                       # number of ReadReq miss cycles
+system.cpu.iq.ISSUE:rate                     0.839745                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                       3463                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      3165                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                   6                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined             947                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                 1                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved              2                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined          468                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadExReq_accesses              25                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency         3720                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency         2720                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency        93000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                25                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency        68000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           25                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               246                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  3357.723577                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2357.723577                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency        826000                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 270                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency       645000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses                 246                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency       580000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            270                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            246                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             13                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency  3230.769231                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency  2230.769231                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency        42000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               13                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency        29000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           13                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
@@ -340,32 +360,32 @@ system.cpu.l2cache.blocked_no_targets               0                       # nu
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                270                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4509.259259                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
+system.cpu.l2cache.demand_accesses                271                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  3391.143911                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2391.143911                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1217500                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency         919000                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  270                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  271                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency       645000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency       648000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             270                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             271                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               270                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4509.259259                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               271                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  3391.143911                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2391.143911                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1217500                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency        919000                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 270                       # number of overall misses
+system.cpu.l2cache.overall_misses                 271                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency       645000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency       648000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            270                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            271                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -378,28 +398,28 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   270                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   233                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               158.313436                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               129.636467                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             4113                       # number of cpu cycles simulated
+system.cpu.numCycles                             3769                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles              3116                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles              2724                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents              1                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups           4416                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts            3886                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         2777                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                700                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             202                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles              6                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              1009                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles           89                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts            9                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                 55                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts            7                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                               8                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.rename.RENAME:RenameLookups           4613                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts            4068                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         2909                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles                733                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             225                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles              7                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              1141                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles           80                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts            8                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts                 52                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts            6                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                               2                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls               4                       # Number of system calls
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index c276fcaea..79e638bb8 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 21 2007 21:25:27
-M5 started Fri Jun 22 00:04:44 2007
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:13 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2055000 because target called exit()
+Exiting @ tick 1884000 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini
index 61db8446a..16ea738bc 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=AtomicSimpleCPU
-children=workload
+children=tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -25,11 +25,15 @@ phase=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu.tracer
 width=1
 workload=system.cpu.workload
 dcache_port=system.membus.port[2]
 icache_port=system.membus.port[1]
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt
index 29351d427..dfc8b7f6b 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt
@@ -1,8 +1,9 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 111994                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.02                       # Real time elapsed on the host
-host_tick_rate                               55017079                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  34280                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 147884                       # Number of bytes of host memory used
+host_seconds                                     0.08                       # Real time elapsed on the host
+host_tick_rate                               17043200                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2578                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout
index f76500526..6e78c47eb 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:37 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/simple-atomic tests/run.py quick/00.hello/alpha/tru64/simple-atomic
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:14 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/simple-atomic tests/run.py quick/00.hello/alpha/tru64/simple-atomic
 Global frequency set at 1000000000000 ticks per second
 Exiting @ tick 1288500 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini
index 5a336ab13..a9adf07b9 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=TimingSimpleCPU
-children=dcache icache l2cache toL2Bus workload
+children=dcache icache l2cache toL2Bus tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -24,17 +24,16 @@ max_loads_any_thread=0
 phase=0
 progress_interval=0
 system=system
+tracer=system.cpu.tracer
 workload=system.cpu.workload
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -52,12 +51,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1]
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -90,12 +85,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -128,12 +119,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -151,6 +140,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt
index 621520fa3..56479827d 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt
@@ -1,12 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  51133                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.05                       # Real time elapsed on the host
-host_tick_rate                              127514531                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  43962                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153564                       # Number of bytes of host memory used
+host_seconds                                     0.06                       # Real time elapsed on the host
+host_tick_rate                              112042683                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2578                       # Number of instructions simulated
-sim_seconds                                  0.000006                       # Number of seconds simulated
-sim_ticks                                     6472000                       # Number of ticks simulated
+sim_seconds                                  0.000007                       # Number of seconds simulated
+sim_ticks                                     6615000                       # Number of ticks simulated
 system.cpu.dcache.ReadReq_accesses                415                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency        14000                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency        13000                       # average ReadReq mshr miss latency
@@ -20,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses              55                       # nu
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency        14000                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency        13000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   267                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency        378000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.091837                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  27                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency       351000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.091837                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             27                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_hits                   256                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency        532000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.129252                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  38                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency       494000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.129252                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             38                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                   7.646341                       # Average number of references to valid blocks.
@@ -38,14 +39,14 @@ system.cpu.dcache.cache_copies                      0                       # nu
 system.cpu.dcache.demand_accesses                 709                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                     627                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1148000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.115656                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                    82                       # number of demand (read+write) misses
+system.cpu.dcache.demand_hits                     616                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         1302000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.131171                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                    93                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1066000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.115656                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses               82                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      1209000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.131171                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses               93                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
@@ -53,14 +54,14 @@ system.cpu.dcache.overall_accesses                709                       # nu
 system.cpu.dcache.overall_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                    627                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1148000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.115656                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                   82                       # number of overall misses
+system.cpu.dcache.overall_hits                    616                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        1302000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.131171                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                   93                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1066000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.115656                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses              82                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      1209000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.131171                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses              93                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -75,7 +76,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                     82                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 50.002941                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                 50.044147                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                      627                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
@@ -137,20 +138,38 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    163                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                 86.067224                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                 86.205303                       # Cycle average of tags in use
 system.cpu.icache.total_refs                     2416                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses               245                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
+system.cpu.l2cache.ReadExReq_accesses              27                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency        12000                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        11000                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency       324000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                27                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       297000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           27                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               218                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency        12000                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       3185000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       2616000                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 245                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      2695000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses                 218                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      2398000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            245                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            218                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             11                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency        12000                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        11000                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       132000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               11                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       121000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           11                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
@@ -160,10 +179,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                245                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        3185000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2940000                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  245                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
@@ -174,11 +193,11 @@ system.cpu.l2cache.fast_writes                      0                       # nu
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               245                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       3185000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2940000                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 245                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
@@ -197,14 +216,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   245                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   207                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               136.108021                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               109.774164                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                          6472000                       # number of cpu cycles simulated
+system.cpu.numCycles                          6615000                       # number of cpu cycles simulated
 system.cpu.num_insts                             2578                       # Number of instructions executed
 system.cpu.num_refs                               710                       # Number of memory references
 system.cpu.workload.PROG:num_syscalls               4                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout
index 1c6780cf0..47fca6faf 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:37 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/simple-timing tests/run.py quick/00.hello/alpha/tru64/simple-timing
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:14 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/simple-timing tests/run.py quick/00.hello/alpha/tru64/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 6472000 because target called exit()
+Exiting @ tick 6615000 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
index ea3ba751b..c6807e6a7 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=AtomicSimpleCPU
-children=workload
+children=tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -25,11 +25,15 @@ phase=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu.tracer
 width=1
 workload=system.cpu.workload
 dcache_port=system.membus.port[2]
 icache_port=system.membus.port[1]
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
@@ -53,7 +57,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt
index 6a0c251b5..98d540d90 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt
@@ -1,9 +1,9 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 535701                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 148368                       # Number of bytes of host memory used
-host_seconds                                     0.01                       # Real time elapsed on the host
-host_tick_rate                              257653061                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  25511                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 149560                       # Number of bytes of host memory used
+host_seconds                                     0.22                       # Real time elapsed on the host
+host_tick_rate                               12728361                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5657                       # Number of instructions simulated
 sim_seconds                                  0.000003                       # Number of seconds simulated
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout
index 7fb23e5a5..3919c7c81 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout
@@ -6,8 +6,8 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 12:54:05
-M5 started Tue May 15 12:54:07 2007
+M5 compiled Aug  3 2007 04:06:41
+M5 started Fri Aug  3 04:31:09 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic tests/run.py quick/00.hello/mips/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
index a5d4e6583..c52036289 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=TimingSimpleCPU
-children=dcache icache l2cache toL2Bus workload
+children=dcache icache l2cache toL2Bus tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -24,17 +24,16 @@ max_loads_any_thread=0
 phase=0
 progress_interval=0
 system=system
+tracer=system.cpu.tracer
 workload=system.cpu.workload
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -44,7 +43,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -52,12 +51,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1]
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -82,7 +77,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -90,12 +85,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -120,7 +111,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -128,12 +119,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -151,6 +140,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
@@ -174,7 +166,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt
index 41bb7c8b7..985175cad 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt
@@ -1,13 +1,13 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 273338                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 153844                       # Number of bytes of host memory used
-host_seconds                                     0.02                       # Real time elapsed on the host
-host_tick_rate                              633390216                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  45085                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155088                       # Number of bytes of host memory used
+host_seconds                                     0.13                       # Real time elapsed on the host
+host_tick_rate                              101545982                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5657                       # Number of instructions simulated
-sim_seconds                                  0.000013                       # Number of seconds simulated
-sim_ticks                                    13362000                       # Number of ticks simulated
+sim_seconds                                  0.000014                       # Number of seconds simulated
+sim_ticks                                    13544000                       # Number of ticks simulated
 system.cpu.dcache.ReadReq_accesses               1130                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency        14000                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency        13000                       # average ReadReq mshr miss latency
@@ -21,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses              82                       # nu
 system.cpu.dcache.WriteReq_accesses               924                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency        14000                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency        13000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   874                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency        700000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.054113                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  50                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency       650000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.054113                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             50                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_hits                   860                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency        896000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.069264                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  64                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency       832000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.069264                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             64                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                  14.560606                       # Average number of references to valid blocks.
@@ -39,14 +39,14 @@ system.cpu.dcache.cache_copies                      0                       # nu
 system.cpu.dcache.demand_accesses                2054                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1922                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1848000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.064265                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   132                       # number of demand (read+write) misses
+system.cpu.dcache.demand_hits                    1908                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2044000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.071081                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   146                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1716000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.064265                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              132                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      1898000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.071081                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              146                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
@@ -54,14 +54,14 @@ system.cpu.dcache.overall_accesses               2054                       # nu
 system.cpu.dcache.overall_avg_miss_latency        14000                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency        13000                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1922                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1848000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.064265                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  132                       # number of overall misses
+system.cpu.dcache.overall_hits                   1908                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2044000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.071081                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  146                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1716000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.064265                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             132                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      1898000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.071081                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             146                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -76,7 +76,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    132                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 85.283494                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                 85.440937                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                     1922                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
@@ -138,34 +138,52 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                     13                       # number of replacements
 system.cpu.icache.sampled_refs                    303                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                136.309471                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                136.727640                       # Cycle average of tags in use
 system.cpu.icache.total_refs                     5355                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses               435                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
+system.cpu.l2cache.ReadExReq_accesses              50                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency        12000                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        11000                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency       600000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                50                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       550000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           50                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               385                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency        12000                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency       5629000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.995402                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 433                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      4763000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.995402                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            433                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_miss_latency       4596000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.994805                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 383                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      4213000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994805                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            383                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency        12000                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        11000                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       168000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       154000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           14                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.004619                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.005420                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                435                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5629000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        5196000                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate          0.995402                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  433                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
@@ -176,11 +194,11 @@ system.cpu.l2cache.fast_writes                      0                       # nu
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               435                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     2                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5629000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       5196000                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate         0.995402                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 433                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
@@ -199,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   433                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   369                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               222.872415                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               184.077317                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                         13362000                       # number of cpu cycles simulated
+system.cpu.numCycles                         13544000                       # number of cpu cycles simulated
 system.cpu.num_insts                             5657                       # Number of instructions executed
 system.cpu.num_refs                              2055                       # Number of memory references
 system.cpu.workload.PROG:num_syscalls              13                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout
index 6b688641a..c24f82c4f 100644
--- a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout
+++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout
@@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 12:54:05
-M5 started Tue May 15 12:54:07 2007
+M5 compiled Aug  3 2007 04:06:41
+M5 started Fri Aug  3 04:31:10 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing tests/run.py quick/00.hello/mips/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 13362000 because target called exit()
+Exiting @ tick 13544000 because target called exit()
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini
index 0e142e6ce..d7237a4af 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=AtomicSimpleCPU
-children=workload
+children=tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -25,11 +25,15 @@ phase=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu.tracer
 width=1
 workload=system.cpu.workload
 dcache_port=system.membus.port[2]
 icache_port=system.membus.port[1]
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
@@ -53,7 +57,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt
index 8e0baaf8b..ab2e76d2a 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt
@@ -1,9 +1,9 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 439375                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 149124                       # Number of bytes of host memory used
-host_seconds                                     0.01                       # Real time elapsed on the host
-host_tick_rate                              211870315                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  15625                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 149968                       # Number of bytes of host memory used
+host_seconds                                     0.31                       # Real time elapsed on the host
+host_tick_rate                                7799892                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        4863                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout
index 9e1770f92..40d1acccc 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout
@@ -5,8 +5,8 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 13:02:31
-M5 started Tue May 15 17:00:05 2007
+M5 compiled Aug  3 2007 04:11:25
+M5 started Fri Aug  3 04:31:18 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/00.hello/sparc/linux/simple-atomic tests/run.py quick/00.hello/sparc/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini
index fdb2bc3c9..4a945c9a3 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=TimingSimpleCPU
-children=dcache icache l2cache toL2Bus workload
+children=dcache icache l2cache toL2Bus tracer workload
 clock=500
 cpu_id=0
 defer_registration=false
@@ -24,17 +24,16 @@ max_loads_any_thread=0
 phase=0
 progress_interval=0
 system=system
+tracer=system.cpu.tracer
 workload=system.cpu.workload
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -44,7 +43,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -52,12 +51,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1]
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -82,7 +77,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -90,12 +85,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -120,7 +111,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@@ -128,12 +119,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -151,6 +140,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
@@ -174,7 +166,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 
 [system.physmem]
 type=PhysicalMemory
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt
index 839307810..7810c3335 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt
@@ -1,33 +1,33 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 239687                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 154512                       # Number of bytes of host memory used
-host_seconds                                     0.02                       # Real time elapsed on the host
-host_tick_rate                              542234464                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  36222                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155556                       # Number of bytes of host memory used
+host_seconds                                     0.13                       # Real time elapsed on the host
+host_tick_rate                               84966253                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        4863                       # Number of instructions simulated
 sim_seconds                                  0.000011                       # Number of seconds simulated
-sim_ticks                                    11221000                       # Number of ticks simulated
+sim_ticks                                    11443000                       # Number of ticks simulated
 system.cpu.dcache.ReadReq_accesses                608                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 13796.296296                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12796.296296                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency 13962.962963                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12962.962963                       # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits                    554                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         745000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         754000                       # number of ReadReq miss cycles
 system.cpu.dcache.ReadReq_miss_rate          0.088816                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                   54                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency       691000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       700000                       # number of ReadReq MSHR miss cycles
 system.cpu.dcache.ReadReq_mshr_miss_rate     0.088816                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              54                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               661                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency        14000                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency        13000                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                   577                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1176000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.127080                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                  84                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency      1092000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.127080                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses             84                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_hits                   562                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       1386000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.149773                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                  99                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency      1287000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.149773                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses             99                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                   8.195652                       # Average number of references to valid blocks.
@@ -37,31 +37,31 @@ system.cpu.dcache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
 system.cpu.dcache.demand_accesses                1269                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 13920.289855                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 12920.289855                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1131                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1921000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.108747                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   138                       # number of demand (read+write) misses
+system.cpu.dcache.demand_avg_miss_latency 13986.928105                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 12986.928105                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1116                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2140000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.120567                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   153                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1783000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.108747                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              138                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency      1987000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.120567                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses              153                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.dcache.overall_accesses               1269                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 13920.289855                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 12920.289855                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency 13986.928105                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 12986.928105                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1131                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1921000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.108747                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  138                       # number of overall misses
+system.cpu.dcache.overall_hits                   1116                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2140000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.120567                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  153                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1783000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.108747                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             138                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_miss_latency      1987000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.120567                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses             153                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -76,18 +76,18 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    138                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 83.705022                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                 83.865949                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                     1131                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.icache.ReadReq_accesses               4864                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 13914.062500                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 12914.062500                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency 13984.375000                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 12984.375000                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                   4608                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3562000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        3580000                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.052632                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  256                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency      3306000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      3324000                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.052632                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             256                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -99,29 +99,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                4864                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 13914.062500                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 12914.062500                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency 13984.375000                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 12984.375000                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                    4608                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3562000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         3580000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.052632                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   256                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      3306000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      3324000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.052632                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              256                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses               4864                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 13914.062500                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 12914.062500                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency 13984.375000                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 12984.375000                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                   4608                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3562000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        3580000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.052632                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  256                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      3306000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      3324000                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.052632                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             256                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -138,53 +138,72 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    256                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                114.172725                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                114.646434                       # Cycle average of tags in use
 system.cpu.icache.total_refs                     4608                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses               391                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
+system.cpu.l2cache.ReadExReq_accesses              84                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency        12000                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency        11000                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency      1008000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses                84                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       924000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses           84                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               310                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency        12000                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       5083000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 391                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      4301000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            391                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_hits                     3                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency       3684000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.990323                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 307                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      3377000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.990323                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            307                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             15                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency        12000                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        11000                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       180000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               15                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       165000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           15                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.010274                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                391                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.demand_accesses                394                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5083000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
+system.cpu.l2cache.demand_hits                      3                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency        4692000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.992386                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  391                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_miss_latency      4301000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate     0.992386                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             391                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               391                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
+system.cpu.l2cache.overall_accesses               394                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency        12000                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5083000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
+system.cpu.l2cache.overall_hits                     3                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency       4692000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.992386                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 391                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_miss_latency      4301000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate     0.992386                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            391                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -198,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   391                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   292                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               196.304892                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
+system.cpu.l2cache.tagsinuse               133.135118                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                       3                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                         11221000                       # number of cpu cycles simulated
+system.cpu.numCycles                         11443000                       # number of cpu cycles simulated
 system.cpu.num_insts                             4863                       # Number of instructions executed
 system.cpu.num_refs                              1269                       # Number of memory references
 system.cpu.workload.PROG:num_syscalls              11                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout
index 65bf4abca..1b34d79bb 100644
--- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout
+++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout
@@ -5,9 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled May 15 2007 13:02:31
-M5 started Tue May 15 17:00:05 2007
+M5 compiled Aug  3 2007 04:11:25
+M5 started Fri Aug  3 04:31:19 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/00.hello/sparc/linux/simple-timing tests/run.py quick/00.hello/sparc/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 11221000 because target called exit()
+Exiting @ tick 11443000 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index f03824f95..fea709a4d 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -11,7 +11,7 @@ physmem=system.physmem
 
 [system.cpu]
 type=DerivO3CPU
-children=dcache fuPool icache l2cache toL2Bus workload0 workload1
+children=dcache fuPool icache l2cache toL2Bus tracer workload0 workload1
 BTBEntries=4096
 BTBTagSize=16
 LFSTSize=1024
@@ -86,6 +86,7 @@ smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
+tracer=system.cpu.tracer
 trapLatency=13
 wbDepth=1
 wbWidth=8
@@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -118,12 +116,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL
 
 [system.cpu.fuPool.FUList0]
 type=FUDesc
-children=opList0
+children=opList
 count=6
-opList=system.cpu.fuPool.FUList0.opList0
+opList=system.cpu.fuPool.FUList0.opList
 
-[system.cpu.fuPool.FUList0.opList0]
+[system.cpu.fuPool.FUList0.opList]
 type=OpDesc
 issueLat=1
 opClass=IntAlu
@@ -217,11 +213,11 @@ opLat=24
 
 [system.cpu.fuPool.FUList4]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList4.opList0
+opList=system.cpu.fuPool.FUList4.opList
 
-[system.cpu.fuPool.FUList4.opList0]
+[system.cpu.fuPool.FUList4.opList]
 type=OpDesc
 issueLat=1
 opClass=MemRead
@@ -229,11 +225,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList5]
 type=FUDesc
-children=opList0
+children=opList
 count=0
-opList=system.cpu.fuPool.FUList5.opList0
+opList=system.cpu.fuPool.FUList5.opList
 
-[system.cpu.fuPool.FUList5.opList0]
+[system.cpu.fuPool.FUList5.opList]
 type=OpDesc
 issueLat=1
 opClass=MemWrite
@@ -259,11 +255,11 @@ opLat=1
 
 [system.cpu.fuPool.FUList7]
 type=FUDesc
-children=opList0
+children=opList
 count=1
-opList=system.cpu.fuPool.FUList7.opList0
+opList=system.cpu.fuPool.FUList7.opList
 
-[system.cpu.fuPool.FUList7.opList0]
+[system.cpu.fuPool.FUList7.opList]
 type=OpDesc
 issueLat=3
 opClass=IprAccess
@@ -271,12 +267,9 @@ opLat=3
 
 [system.cpu.icache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -294,12 +287,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
@@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0]
 
 [system.cpu.l2cache]
 type=BaseCache
-adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -333,12 +321,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
@@ -356,6 +342,9 @@ responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.cpu.workload0]
 type=LiveProcess
 cmd=hello
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index 39a686d6b..259a48483 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -2,47 +2,47 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
 global.BPredUnit.BTBHits                          696                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      3414                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                     125                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   1124                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   2315                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         3940                       # Number of BP lookups
-global.BPredUnit.usedRAS                          525                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  52706                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 154396                       # Number of bytes of host memory used
-host_seconds                                     0.21                       # Real time elapsed on the host
-host_tick_rate                               25698682                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
-memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores                53                       # Number of conflicting stores.
-memdepunit.memDep.conflictingStores                59                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1934                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads                  1903                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1082                       # Number of stores inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1090                       # Number of stores inserted to the mem dependence unit.
+global.BPredUnit.BTBLookups                      3444                       # Number of BTB lookups
+global.BPredUnit.RASInCorrect                     120                       # Number of incorrect RAS predictions.
+global.BPredUnit.condIncorrect                   1098                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   2319                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         3987                       # Number of BP lookups
+global.BPredUnit.usedRAS                          539                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  43485                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155152                       # Number of bytes of host memory used
+host_seconds                                     0.26                       # Real time elapsed on the host
+host_tick_rate                               19795862                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
+memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores                48                       # Number of conflicting stores.
+memdepunit.memDep.conflictingStores                52                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  1908                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1873                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1098                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1086                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       11247                       # Number of instructions simulated
 sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     5491500                       # Number of ticks simulated
+sim_ticks                                     5126000                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   1724                       # Number of branches committed
 system.cpu.commit.COM:branches_0                  862                       # Number of branches committed
 system.cpu.commit.COM:branches_1                  862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               168                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               164                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_0                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_1                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        10926                      
+system.cpu.commit.COM:committed_per_cycle.samples        10208                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6353   5814.57%           
-                               1         2078   1901.89%           
-                               2          996    911.59%           
-                               3          472    432.00%           
-                               4          296    270.91%           
-                               5          241    220.57%           
-                               6          192    175.73%           
-                               7          130    118.98%           
-                               8          168    153.76%           
+                               0         5629   5514.30%           
+                               1         2071   2028.80%           
+                               2          984    963.95%           
+                               3          471    461.40%           
+                               4          357    349.73%           
+                               5          228    223.35%           
+                               6          179    175.35%           
+                               7          125    122.45%           
+                               8          164    160.66%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -61,133 +61,133 @@ system.cpu.commit.COM:refs_1                     1791                       # Nu
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_0                   0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_1                   0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               885                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               852                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          11281                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              34                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            7777                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            7556                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts_0                      5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_1                      5624                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 11247                       # Number of Instructions Simulated
-system.cpu.cpi_0                             1.952872                       # CPI: Cycles Per Instruction
-system.cpu.cpi_1                             1.952525                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         0.976349                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2981                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0             2981                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency_0  7040.892193                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  6979.591837                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2712                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0                 2712                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        1894000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0      1894000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate_0        0.090238                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  269                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0                269                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                73                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0              73                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      1368000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0      1368000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.065750                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses             196                       # number of ReadReq MSHR misses
-system.cpu.dcache.ReadReq_mshr_misses_0           196                       # number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses              1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_accesses_0            1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency_0  5306.613226                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5852.739726                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1125                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_hits_0                1125                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       2648000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0      2648000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate_0       0.307266                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 499                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_misses_0               499                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              353                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_hits_0            353                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       854500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0       854500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.089901                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses            146                       # number of WriteReq MSHR misses
-system.cpu.dcache.WriteReq_mshr_misses_0          146                       # number of WriteReq MSHR misses
+system.cpu.cpi_0                             1.822870                       # CPI: Cycles Per Instruction
+system.cpu.cpi_1                             1.822546                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                         0.911354                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               2898                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0             2898                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency_0 10388.059701                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  7328.358209                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2697                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0                 2697                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        2088000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0      2088000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate_0        0.069358                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  201                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0                201                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                74                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0              74                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency      1473000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0      1473000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.069358                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses             201                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses_0           201                       # number of ReadReq MSHR misses
+system.cpu.dcache.WriteReq_accesses              1265                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_accesses_0            1265                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency_0 16353.448276                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5663.793103                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_hits                  1091                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits_0                1091                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency       2845500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0      2845500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate_0       0.137549                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses                 174                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses_0               174                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_hits              359                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits_0            359                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_miss_latency       985500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0       985500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.137549                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses            174                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_mshr_misses_0          174                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.219298                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  10.997118                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4605                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0              4605                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                4163                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0              4163                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0  5914.062500                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0        13156                       # average overall miss latency
 system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0         6556                       # average overall mshr miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3837                       # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0                  3837                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3788                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0                  3788                       # number of demand (read+write) hits
 system.cpu.dcache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         4542000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0       4542000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         4933500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0       4933500                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0         0.166775                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0         0.090079                       # miss rate for demand accesses
 system.cpu.dcache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   768                       # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0                 768                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                   375                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0                 375                       # number of demand (read+write) misses
 system.cpu.dcache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                426                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0              426                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                433                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0              433                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      2222500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0      2222500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      2458500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0      2458500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0     0.074267                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0     0.090079                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              342                       # number of demand (read+write) MSHR misses
-system.cpu.dcache.demand_mshr_misses_0            342                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses              375                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses_0            375                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4605                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0             4605                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               4163                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0             4163                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0  5914.062500                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0        13156                       # average overall miss latency
 system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0         6556                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3837                       # number of overall hits
-system.cpu.dcache.overall_hits_0                 3837                       # number of overall hits
+system.cpu.dcache.overall_hits                   3788                       # number of overall hits
+system.cpu.dcache.overall_hits_0                 3788                       # number of overall hits
 system.cpu.dcache.overall_hits_1                    0                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        4542000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0      4542000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        4933500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0      4933500                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0        0.166775                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0        0.090079                       # miss rate for overall accesses
 system.cpu.dcache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  768                       # number of overall misses
-system.cpu.dcache.overall_misses_0                768                       # number of overall misses
+system.cpu.dcache.overall_misses                  375                       # number of overall misses
+system.cpu.dcache.overall_misses_0                375                       # number of overall misses
 system.cpu.dcache.overall_misses_1                  0                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               426                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0             426                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               433                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0             433                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      2222500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0      2222500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      2458500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0      2458500                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0     0.074267                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0     0.090079                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             342                       # number of overall MSHR misses
-system.cpu.dcache.overall_mshr_misses_0           342                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses             375                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses_0           375                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -207,149 +207,149 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.replacements_0                    0                       # number of replacements
 system.cpu.dcache.replacements_1                    0                       # number of replacements
-system.cpu.dcache.sampled_refs                    342                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    347                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                221.287284                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3837                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                222.253048                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3816                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dcache.writebacks_0                      0                       # number of writebacks
 system.cpu.dcache.writebacks_1                      0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles           1876                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            246                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           345                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           21769                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             14522                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               3673                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            1511                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            346                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles            145                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        3940                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      3009                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          6972                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   537                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          23897                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    1189                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.358802                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               3009                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               1221                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        2.176213                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles           1825                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred            258                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           356                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           21887                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             13153                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               3652                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            1444                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            304                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:UnblockCycles            187                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        3987                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      2956                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          6947                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   423                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          24040                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    1159                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.388976                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               2956                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               1235                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        2.345366                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               10981                      
+system.cpu.fetch.rateDist.samples               10250                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7019   6391.95%           
-                               1          293    266.82%           
-                               2          225    204.90%           
-                               3          260    236.77%           
-                               4          345    314.18%           
-                               5          288    262.27%           
-                               6          304    276.84%           
-                               7          246    224.02%           
-                               8         2001   1822.24%           
+                               0         6260   6107.32%           
+                               1          296    288.78%           
+                               2          229    223.41%           
+                               3          268    261.46%           
+                               4          338    329.76%           
+                               5          294    286.83%           
+                               6          303    295.61%           
+                               7          254    247.80%           
+                               8         2008   1959.02%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               3009                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0             3009                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency_0  5911.144578                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5119.774920                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   2345                       # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0                 2345                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3925000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0      3925000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate_0        0.220671                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  664                       # number of ReadReq misses
-system.cpu.icache.ReadReq_misses_0                664                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                42                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0              42                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      3184500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0      3184500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate_0     0.206713                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             622                       # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0           622                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses               2906                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0             2906                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency_0  6488.691438                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5218.093700                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   2287                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0                 2287                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        4016500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0      4016500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate_0        0.213008                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  619                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses_0                619                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                50                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0              50                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      3230000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0      3230000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate_0     0.213008                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             619                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0           619                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.770096                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.694669                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                3009                       # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0              3009                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                2906                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0              2906                       # number of demand (read+write) accesses
 system.cpu.icache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.icache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0  5911.144578                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0  6488.691438                       # average overall miss latency
 system.cpu.icache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0  5218.093700                       # average overall mshr miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    2345                       # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0                  2345                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    2287                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0                  2287                       # number of demand (read+write) hits
 system.cpu.icache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3925000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0       3925000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         4016500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0       4016500                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0         0.220671                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0         0.213008                       # miss rate for demand accesses
 system.cpu.icache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   664                       # number of demand (read+write) misses
-system.cpu.icache.demand_misses_0                 664                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   619                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses_0                 619                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 42                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0               42                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                 50                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0               50                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      3184500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0      3184500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      3230000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0      3230000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0     0.206713                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0     0.213008                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              622                       # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0            622                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              619                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0            619                       # number of demand (read+write) MSHR misses
 system.cpu.icache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               3009                       # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0             3009                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               2906                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0             2906                       # number of overall (read+write) accesses
 system.cpu.icache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.icache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0  5911.144578                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0  6488.691438                       # average overall miss latency
 system.cpu.icache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0  5218.093700                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   2345                       # number of overall hits
-system.cpu.icache.overall_hits_0                 2345                       # number of overall hits
+system.cpu.icache.overall_hits                   2287                       # number of overall hits
+system.cpu.icache.overall_hits_0                 2287                       # number of overall hits
 system.cpu.icache.overall_hits_1                    0                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3925000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0      3925000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        4016500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0      4016500                       # number of overall miss cycles
 system.cpu.icache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0        0.220671                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0        0.213008                       # miss rate for overall accesses
 system.cpu.icache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  664                       # number of overall misses
-system.cpu.icache.overall_misses_0                664                       # number of overall misses
+system.cpu.icache.overall_misses                  619                       # number of overall misses
+system.cpu.icache.overall_misses_0                619                       # number of overall misses
 system.cpu.icache.overall_misses_1                  0                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                42                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0              42                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits                50                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0              50                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      3184500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0      3184500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      3230000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0      3230000                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0     0.206713                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0     0.213008                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             622                       # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0           622                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             619                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0           619                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -369,104 +369,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      9                       # number of replacements
 system.cpu.icache.replacements_0                    9                       # number of replacements
 system.cpu.icache.replacements_1                    0                       # number of replacements
-system.cpu.icache.sampled_refs                    622                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    619                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                323.196356                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     2345                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                320.555850                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     2287                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.icache.writebacks_0                      0                       # number of writebacks
 system.cpu.icache.writebacks_1                      0                       # number of writebacks
 system.cpu.idleCycles                            2997                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2377                       # Number of branches executed
-system.cpu.iew.EXEC:branches_0                   1192                       # Number of branches executed
-system.cpu.iew.EXEC:branches_1                   1185                       # Number of branches executed
-system.cpu.iew.EXEC:nop                            72                       # number of nop insts executed
+system.cpu.iew.EXEC:branches                     2346                       # Number of branches executed
+system.cpu.iew.EXEC:branches_0                   1165                       # Number of branches executed
+system.cpu.iew.EXEC:branches_1                   1181                       # Number of branches executed
+system.cpu.iew.EXEC:nop                            71                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_0                          37                       # number of nop insts executed
-system.cpu.iew.EXEC:nop_1                          35                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     1.419725                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5002                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0                       2507                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1                       2495                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       1874                       # Number of stores executed
-system.cpu.iew.EXEC:stores_0                      933                       # Number of stores executed
-system.cpu.iew.EXEC:stores_1                      941                       # Number of stores executed
+system.cpu.iew.EXEC:nop_1                          34                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     1.504390                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         4985                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0                       2501                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1                       2484                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       1875                       # Number of stores executed
+system.cpu.iew.EXEC:stores_0                      943                       # Number of stores executed
+system.cpu.iew.EXEC:stores_1                      932                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_0                           0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_1                           0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     10260                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_0                    5135                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_1                    5125                       # num instructions consuming a value
-system.cpu.iew.WB:count                         14994                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_0                        7526                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_1                        7468                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     1.530607                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_0                   0.763778                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_1                   0.766829                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                     10076                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_0                    5067                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_1                    5009                       # num instructions consuming a value
+system.cpu.iew.WB:count                         14858                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_0                        7442                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_1                        7416                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     1.533770                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_0                   0.764555                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_1                   0.769215                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_0                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_1                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_0                  0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_1                  0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      7852                       # num instructions producing a value
-system.cpu.iew.WB:producers_0                    3922                       # num instructions producing a value
-system.cpu.iew.WB:producers_1                    3930                       # num instructions producing a value
-system.cpu.iew.WB:rate                       1.365449                       # insts written-back per cycle
-system.cpu.iew.WB:rate_0                     0.685366                       # insts written-back per cycle
-system.cpu.iew.WB:rate_1                     0.680084                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          15132                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0                         7582                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1                         7550                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  958                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                       6                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3837                       # Number of dispatched load instructions
+system.cpu.iew.WB:producers                      7727                       # num instructions producing a value
+system.cpu.iew.WB:producers_0                    3874                       # num instructions producing a value
+system.cpu.iew.WB:producers_1                    3853                       # num instructions producing a value
+system.cpu.iew.WB:rate                       1.449561                       # insts written-back per cycle
+system.cpu.iew.WB:rate_0                     0.726049                       # insts written-back per cycle
+system.cpu.iew.WB:rate_1                     0.723512                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          14990                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0                         7512                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1                         7478                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  976                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                       8                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  3781                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 42                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               445                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 2172                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19086                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3128                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0                1574                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1                1554                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               852                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 15590                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewDispSquashedInsts               481                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 2184                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               18854                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  3110                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0                1558                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1                1552                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               865                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 15420                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                      2                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   1511                       # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
+system.cpu.iew.iewSquashCycles                   1444                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                     2                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              43                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.0.ignoredResponses            4                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.forwLoads              37                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           64                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           65                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          955                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          270                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          929                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          286                       # Number of stores squashed
 system.cpu.iew.lsq.thread.1.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.1.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads              42                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses            2                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.1.forwLoads              44                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.ignoredResponses            3                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.1.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.1.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation           58                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation           65                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.1.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads          924                       # Number of loads squashed
-system.cpu.iew.lsq.thread.1.squashedStores          278                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents            122                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          767                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            191                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0                             0.512066                       # IPC: Instructions Per Cycle
-system.cpu.ipc_1                             0.512157                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         1.024224                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    8235                       # Type of FU issued
+system.cpu.iew.lsq.thread.1.squashedLoads          894                       # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedStores          274                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents            130                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          783                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            193                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc_0                             0.548585                       # IPC: Instructions Per Cycle
+system.cpu.ipc_1                             0.548683                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         1.097268                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                    8178                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                       No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5567     67.60%            # Type of FU issued
+                          IntAlu         5509     67.36%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.02%            # Type of FU issued
@@ -475,15 +475,15 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1702     20.67%            # Type of FU issued
-                        MemWrite          961     11.67%            # Type of FU issued
+                         MemRead         1692     20.69%            # Type of FU issued
+                        MemWrite          972     11.89%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1                    8207                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1                    8107                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.start_dist
                       No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5547     67.59%            # Type of FU issued
+                          IntAlu         5452     67.25%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.02%            # Type of FU issued
@@ -492,15 +492,15 @@ system.cpu.iq.ISSUE:FU_type_1.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1690     20.59%            # Type of FU issued
-                        MemWrite          965     11.76%            # Type of FU issued
+                         MemRead         1681     20.74%            # Type of FU issued
+                        MemWrite          969     11.95%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type                     16442                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type                     16285                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.start_dist
                       No_OpClass            4      0.02%            # Type of FU issued
-                          IntAlu        11114     67.60%            # Type of FU issued
+                          IntAlu        10961     67.31%            # Type of FU issued
                          IntMult            2      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            4      0.02%            # Type of FU issued
@@ -509,20 +509,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3392     20.63%            # Type of FU issued
-                        MemWrite         1926     11.71%            # Type of FU issued
+                         MemRead         3373     20.71%            # Type of FU issued
+                        MemWrite         1941     11.92%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   189                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0                  98                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1                  91                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.011495                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0           0.005960                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1           0.005535                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   180                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0                  92                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1                  88                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.011053                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0           0.005649                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1           0.005404                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                       No_OpClass            0      0.00%            # attempts to use FU when none available
-                          IntAlu           14      7.41%            # attempts to use FU when none available
+                          IntAlu            7      3.89%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -531,75 +531,103 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead          107     56.61%            # attempts to use FU when none available
-                        MemWrite           68     35.98%            # attempts to use FU when none available
+                         MemRead          112     62.22%            # attempts to use FU when none available
+                        MemWrite           61     33.89%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        10981                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        10250                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         4775   4348.42%           
-                               1         1817   1654.68%           
-                               2         1638   1491.67%           
-                               3         1107   1008.10%           
-                               4          745    678.44%           
-                               5          490    446.23%           
-                               6          287    261.36%           
-                               7          100     91.07%           
-                               8           22     20.03%           
+                               0         4091   3991.22%           
+                               1         1777   1733.66%           
+                               2         1632   1592.20%           
+                               3         1101   1074.15%           
+                               4          778    759.02%           
+                               5          523    510.24%           
+                               6          249    242.93%           
+                               7           72     70.24%           
+                               8           27     26.34%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     1.497314                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      18972                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     16442                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     1.588780                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      18741                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     16285                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  42                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            6918                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                63                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined            6728                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                34                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              8                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         4274                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               962                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0             962                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency_0  5208.636837                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2724.765869                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_hits                     1                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_hits_0                   1                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency       5005500                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0      5005500                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate_0       0.998960                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 961                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0               961                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      2618500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2618500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.998960                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            961                       # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0          961                       # number of ReadReq MSHR misses
+system.cpu.iq.iqSquashedOperandsExamined         4160                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadExReq_accesses             146                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_accesses_0           146                       # number of ReadExReq accesses(hits+misses)
+system.cpu.l2cache.ReadExReq_avg_miss_latency_0  3893.835616                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency_0  2893.835616                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency       568500                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_latency_0       568500                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_rate_0            1                       # miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_misses               146                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_misses_0             146                       # number of ReadExReq misses
+system.cpu.l2cache.ReadExReq_mshr_miss_latency       422500                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_latency_0       422500                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_rate_0            1                       # mshr miss rate for ReadExReq accesses
+system.cpu.l2cache.ReadExReq_mshr_misses          146                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadExReq_mshr_misses_0          146                       # number of ReadExReq MSHR misses
+system.cpu.l2cache.ReadReq_accesses               820                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0             820                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency_0  3880.368098                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2880.368098                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                     5                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_hits_0                   5                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency       3162500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0      3162500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate_0       0.993902                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 815                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0               815                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      2347500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2347500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.993902                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            815                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0          815                       # number of ReadReq MSHR misses
+system.cpu.l2cache.UpgradeReq_accesses             28                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_accesses_0           28                       # number of UpgradeReq accesses(hits+misses)
+system.cpu.l2cache.UpgradeReq_avg_miss_latency_0  3392.857143                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency_0  2392.857143                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency        95000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_latency_0        95000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_rate_0            1                       # miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_misses               28                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_misses_0             28                       # number of UpgradeReq misses
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency        67000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency_0        67000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_rate_0            1                       # mshr miss rate for UpgradeReq accesses
+system.cpu.l2cache.UpgradeReq_mshr_misses           28                       # number of UpgradeReq MSHR misses
+system.cpu.l2cache.UpgradeReq_mshr_misses_0           28                       # number of UpgradeReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.001041                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.006353                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                962                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0              962                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                966                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0              966                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_accesses_1                0                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0  5208.636837                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0  3882.414152                       # average overall miss latency
 system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2882.414152                       # average overall mshr miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                      1                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_hits_0                    1                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_hits                      5                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_hits_0                    5                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_1                    0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5005500                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0      5005500                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        3731000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0      3731000                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_latency_1            0                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate      <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_miss_rate_0        0.998960                       # miss rate for demand accesses
+system.cpu.l2cache.demand_miss_rate_0        0.994824                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_1    <err: div-0>                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  961                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_misses_0                961                       # number of demand (read+write) misses
@@ -607,11 +635,11 @@ system.cpu.l2cache.demand_misses_1                  0                       # nu
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_0               0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_1               0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      2618500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0      2618500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      2770000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0      2770000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_miss_rate_0     0.998960                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate_0     0.994824                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             961                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.demand_mshr_misses_0           961                       # number of demand (read+write) MSHR misses
@@ -621,26 +649,26 @@ system.cpu.l2cache.mshr_cap_events                  0                       # nu
 system.cpu.l2cache.mshr_cap_events_0                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_1                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               962                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0             962                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               966                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0             966                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_accesses_1               0                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0  5208.636837                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0  3882.414152                       # average overall miss latency
 system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2882.414152                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits                     1                       # number of overall hits
-system.cpu.l2cache.overall_hits_0                   1                       # number of overall hits
+system.cpu.l2cache.overall_hits                     5                       # number of overall hits
+system.cpu.l2cache.overall_hits_0                   5                       # number of overall hits
 system.cpu.l2cache.overall_hits_1                   0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5005500                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0      5005500                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       3731000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0      3731000                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate     <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_miss_rate_0       0.998960                       # miss rate for overall accesses
+system.cpu.l2cache.overall_miss_rate_0       0.994824                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_1   <err: div-0>                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 961                       # number of overall misses
 system.cpu.l2cache.overall_misses_0               961                       # number of overall misses
@@ -648,11 +676,11 @@ system.cpu.l2cache.overall_misses_1                 0                       # nu
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_0              0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_1              0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      2618500                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0      2618500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      2770000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0      2770000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_miss_rate_0     0.998960                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate_0     0.994824                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            961                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_misses_0          961                       # number of overall MSHR misses
@@ -675,31 +703,31 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.replacements_0                   0                       # number of replacements
 system.cpu.l2cache.replacements_1                   0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   961                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   787                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               545.318204                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                       1                       # Total number of references to valid blocks.
+system.cpu.l2cache.tagsinuse               430.884580                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                       5                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.l2cache.writebacks_0                     0                       # number of writebacks
 system.cpu.l2cache.writebacks_1                     0                       # number of writebacks
-system.cpu.numCycles                            10981                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles              612                       # Number of cycles rename is blocking
+system.cpu.numCycles                            10250                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles              565                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           8102                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             14828                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            692                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          26356                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           20731                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        15606                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               3494                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            1511                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            761                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              7504                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles          521                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:IdleCycles             13468                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            717                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:RenameLookups          26379                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           20752                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        15596                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               3515                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            1444                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            772                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps              7494                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles          497                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           48                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               2159                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts               2091                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           37                       # count of temporary serializing insts renamed
 system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload0.PROG:num_syscalls             17                       # Number of system calls
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index 76288ac1d..2e4042a43 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,9 +7,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 21 2007 21:25:27
-M5 started Fri Jun 22 00:04:51 2007
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:15 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 5491500 because target called exit()
+Exiting @ tick 5126000 because target called exit()
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini
index 2d3b1a754..d3a9862e8 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini
@@ -35,7 +35,7 @@ side_b=system.membus.port[0]
 
 [system.cpu0]
 type=AtomicSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=0
 defer_registration=false
@@ -55,18 +55,16 @@ profile=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu0.tracer
 width=1
 dcache_port=system.cpu0.dcache.cpu_side
 icache_port=system.cpu0.icache.cpu_side
 
 [system.cpu0.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -84,12 +82,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu0.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -98,23 +94,15 @@ write_buffers=8
 cpu_side=system.cpu0.dcache_port
 mem_side=system.toL2Bus.port[2]
 
-[system.cpu0.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu0.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu0.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -132,12 +120,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu0.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -146,18 +132,16 @@ write_buffers=8
 cpu_side=system.cpu0.icache_port
 mem_side=system.toL2Bus.port[1]
 
-[system.cpu0.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu0.itb]
 type=AlphaITB
 size=48
 
+[system.cpu0.tracer]
+type=ExeTracer
+
 [system.cpu1]
 type=AtomicSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=1
 defer_registration=false
@@ -177,18 +161,16 @@ profile=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu1.tracer
 width=1
 dcache_port=system.cpu1.dcache.cpu_side
 icache_port=system.cpu1.icache.cpu_side
 
 [system.cpu1.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -206,12 +188,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu1.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -220,23 +200,15 @@ write_buffers=8
 cpu_side=system.cpu1.dcache_port
 mem_side=system.toL2Bus.port[4]
 
-[system.cpu1.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu1.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu1.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -254,12 +226,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu1.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -268,15 +238,13 @@ write_buffers=8
 cpu_side=system.cpu1.icache_port
 mem_side=system.toL2Bus.port[3]
 
-[system.cpu1.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu1.itb]
 type=AlphaITB
 size=48
 
+[system.cpu1.tracer]
+type=ExeTracer
+
 [system.disk0]
 type=IdeDisk
 children=image
@@ -331,11 +299,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst
 
 [system.l2c]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=8
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -353,12 +319,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=4194304
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=16
 trace_addr=0
@@ -922,7 +886,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=system.tsunami
 system=system
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=system.tsunami
 year_is_bcd=false
 pio=system.iobus.port[23]
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt
index 3458060ce..df780ee45 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt
@@ -1,20 +1,29 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                1418499                       # Simulator instruction rate (inst/s)
-host_seconds                                    44.50                       # Real time elapsed on the host
-host_tick_rate                            42028043491                       # Simulator tick rate (ticks/s)
+host_inst_rate                                1258571                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 256444                       # Number of bytes of host memory used
+host_seconds                                    50.16                       # Real time elapsed on the host
+host_tick_rate                            37289409683                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                    63125943                       # Number of instructions simulated
 sim_seconds                                  1.870335                       # Number of seconds simulated
 sim_ticks                                1870335101500                       # Number of ticks simulated
-system.cpu0.dcache.ReadReq_accesses           9163941                       # number of ReadReq accesses(hits+misses)
-system.cpu0.dcache.ReadReq_hits               7464198                       # number of ReadReq hits
-system.cpu0.dcache.ReadReq_miss_rate         0.185482                       # miss rate for ReadReq accesses
-system.cpu0.dcache.ReadReq_misses             1699743                       # number of ReadReq misses
-system.cpu0.dcache.WriteReq_accesses          5933396                       # number of WriteReq accesses(hits+misses)
-system.cpu0.dcache.WriteReq_hits              5646722                       # number of WriteReq hits
-system.cpu0.dcache.WriteReq_miss_rate        0.048315                       # miss rate for WriteReq accesses
-system.cpu0.dcache.WriteReq_misses             286674                       # number of WriteReq misses
+system.cpu0.dcache.LoadLockedReq_accesses       188283                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu0.dcache.LoadLockedReq_hits          172122                       # number of LoadLockedReq hits
+system.cpu0.dcache.LoadLockedReq_miss_rate     0.085834                       # miss rate for LoadLockedReq accesses
+system.cpu0.dcache.LoadLockedReq_misses         16161                       # number of LoadLockedReq misses
+system.cpu0.dcache.ReadReq_accesses           8975658                       # number of ReadReq accesses(hits+misses)
+system.cpu0.dcache.ReadReq_hits               7292076                       # number of ReadReq hits
+system.cpu0.dcache.ReadReq_miss_rate         0.187572                       # miss rate for ReadReq accesses
+system.cpu0.dcache.ReadReq_misses             1683582                       # number of ReadReq misses
+system.cpu0.dcache.StoreCondReq_accesses       187323                       # number of StoreCondReq accesses(hits+misses)
+system.cpu0.dcache.StoreCondReq_hits           159819                       # number of StoreCondReq hits
+system.cpu0.dcache.StoreCondReq_miss_rate     0.146827                       # miss rate for StoreCondReq accesses
+system.cpu0.dcache.StoreCondReq_misses          27504                       # number of StoreCondReq misses
+system.cpu0.dcache.WriteReq_accesses          5746073                       # number of WriteReq accesses(hits+misses)
+system.cpu0.dcache.WriteReq_hits              5372266                       # number of WriteReq hits
+system.cpu0.dcache.WriteReq_miss_rate        0.065054                       # miss rate for WriteReq accesses
+system.cpu0.dcache.WriteReq_misses             373807                       # number of WriteReq misses
 system.cpu0.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu0.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu0.dcache.avg_refs                  6.625567                       # Average number of references to valid blocks.
@@ -23,13 +32,13 @@ system.cpu0.dcache.blocked_no_targets               0                       # nu
 system.cpu0.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu0.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu0.dcache.cache_copies                     0                       # number of cache copies performed
-system.cpu0.dcache.demand_accesses           15097337                       # number of demand (read+write) accesses
+system.cpu0.dcache.demand_accesses           14721731                       # number of demand (read+write) accesses
 system.cpu0.dcache.demand_avg_miss_latency            0                       # average overall miss latency
 system.cpu0.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu0.dcache.demand_hits               13110920                       # number of demand (read+write) hits
+system.cpu0.dcache.demand_hits               12664342                       # number of demand (read+write) hits
 system.cpu0.dcache.demand_miss_latency              0                       # number of demand (read+write) miss cycles
-system.cpu0.dcache.demand_miss_rate          0.131574                       # miss rate for demand accesses
-system.cpu0.dcache.demand_misses              1986417                       # number of demand (read+write) misses
+system.cpu0.dcache.demand_miss_rate          0.139752                       # miss rate for demand accesses
+system.cpu0.dcache.demand_misses              2057389                       # number of demand (read+write) misses
 system.cpu0.dcache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu0.dcache.demand_mshr_miss_latency            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu0.dcache.demand_mshr_miss_rate            0                       # mshr miss rate for demand accesses
@@ -37,14 +46,14 @@ system.cpu0.dcache.demand_mshr_misses               0                       # nu
 system.cpu0.dcache.fast_writes                      0                       # number of fast writes performed
 system.cpu0.dcache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu0.dcache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu0.dcache.overall_accesses          15097337                       # number of overall (read+write) accesses
+system.cpu0.dcache.overall_accesses          14721731                       # number of overall (read+write) accesses
 system.cpu0.dcache.overall_avg_miss_latency            0                       # average overall miss latency
 system.cpu0.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.cpu0.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu0.dcache.overall_hits              13110920                       # number of overall hits
+system.cpu0.dcache.overall_hits              12664342                       # number of overall hits
 system.cpu0.dcache.overall_miss_latency             0                       # number of overall miss cycles
-system.cpu0.dcache.overall_miss_rate         0.131574                       # miss rate for overall accesses
-system.cpu0.dcache.overall_misses             1986417                       # number of overall misses
+system.cpu0.dcache.overall_miss_rate         0.139752                       # miss rate for overall accesses
+system.cpu0.dcache.overall_misses             2057389                       # number of overall misses
 system.cpu0.dcache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu0.dcache.overall_mshr_miss_latency            0                       # number of overall MSHR miss cycles
 system.cpu0.dcache.overall_mshr_miss_rate            0                       # mshr miss rate for overall accesses
@@ -60,39 +69,13 @@ system.cpu0.dcache.prefetcher.num_hwpf_issued            0
 system.cpu0.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu0.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu0.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu0.dcache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu0.dcache.protocol.read_invalid      1699743                       # read misses to invalid blocks
-system.cpu0.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu0.dcache.protocol.snoop_inv_modified            2                       # Invalidate snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_read_exclusive          689                       # read snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_read_modified         4128                       # read snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_read_owned          121                       # read snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_read_shared         2691                       # read snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_readex_exclusive          241                       # readEx snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_readex_modified          227                       # readEx snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_readex_owned           21                       # readEx snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_readex_shared           14                       # readEx snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_upgrade_owned         1359                       # upgrade snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_upgrade_shared          725                       # upgradee snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu0.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu0.dcache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu0.dcache.protocol.write_invalid       282338                       # write misses to invalid blocks
-system.cpu0.dcache.protocol.write_owned          2517                       # write misses to owned blocks
-system.cpu0.dcache.protocol.write_shared         1819                       # write misses to shared blocks
 system.cpu0.dcache.replacements               1978980                       # number of replacements
 system.cpu0.dcache.sampled_refs               1979492                       # Sample count of references to valid blocks.
 system.cpu0.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu0.dcache.tagsinuse               504.827576                       # Cycle average of tags in use
 system.cpu0.dcache.total_refs                13115256                       # Total number of references to valid blocks.
 system.cpu0.dcache.warmup_cycle              10840000                       # Cycle when the warmup percentage was hit.
-system.cpu0.dcache.writebacks                       0                       # number of writebacks
+system.cpu0.dcache.writebacks                  396796                       # number of writebacks
 system.cpu0.dtb.accesses                       698037                       # DTB accesses
 system.cpu0.dtb.acv                               251                       # DTB access violations
 system.cpu0.dtb.hits                         15082969                       # DTB hits
@@ -154,32 +137,6 @@ system.cpu0.icache.prefetcher.num_hwpf_issued            0
 system.cpu0.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu0.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu0.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu0.icache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu0.icache.protocol.read_invalid       884872                       # read misses to invalid blocks
-system.cpu0.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu0.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu0.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu0.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu0.icache.protocol.snoop_read_exclusive        25832                       # read snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu0.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu0.icache.protocol.snoop_read_shared        13268                       # read snoops on shared blocks
-system.cpu0.icache.protocol.snoop_readex_exclusive           78                       # readEx snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu0.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu0.icache.protocol.snoop_readex_shared            0                       # readEx snoops on shared blocks
-system.cpu0.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu0.icache.protocol.snoop_upgrade_shared            6                       # upgradee snoops on shared blocks
-system.cpu0.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu0.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu0.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu0.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu0.icache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu0.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu0.icache.protocol.write_owned             0                       # write misses to owned blocks
-system.cpu0.icache.protocol.write_shared            0                       # write misses to shared blocks
 system.cpu0.icache.replacements                884276                       # number of replacements
 system.cpu0.icache.sampled_refs                884788                       # Sample count of references to valid blocks.
 system.cpu0.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
@@ -285,14 +242,22 @@ system.cpu0.not_idle_fraction                0.015290                       # Pe
 system.cpu0.numCycles                        57193784                       # number of cpu cycles simulated
 system.cpu0.num_insts                        57190172                       # Number of instructions executed
 system.cpu0.num_refs                         15322419                       # Number of memory references
-system.cpu1.dcache.ReadReq_accesses           1167383                       # number of ReadReq accesses(hits+misses)
-system.cpu1.dcache.ReadReq_hits               1124444                       # number of ReadReq hits
-system.cpu1.dcache.ReadReq_miss_rate         0.036782                       # miss rate for ReadReq accesses
-system.cpu1.dcache.ReadReq_misses               42939                       # number of ReadReq misses
-system.cpu1.dcache.WriteReq_accesses           749650                       # number of WriteReq accesses(hits+misses)
-system.cpu1.dcache.WriteReq_hits               723062                       # number of WriteReq hits
-system.cpu1.dcache.WriteReq_miss_rate        0.035467                       # miss rate for WriteReq accesses
-system.cpu1.dcache.WriteReq_misses              26588                       # number of WriteReq misses
+system.cpu1.dcache.LoadLockedReq_accesses        16418                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu1.dcache.LoadLockedReq_hits           15129                       # number of LoadLockedReq hits
+system.cpu1.dcache.LoadLockedReq_miss_rate     0.078511                       # miss rate for LoadLockedReq accesses
+system.cpu1.dcache.LoadLockedReq_misses          1289                       # number of LoadLockedReq misses
+system.cpu1.dcache.ReadReq_accesses           1150965                       # number of ReadReq accesses(hits+misses)
+system.cpu1.dcache.ReadReq_hits               1109315                       # number of ReadReq hits
+system.cpu1.dcache.ReadReq_miss_rate         0.036187                       # miss rate for ReadReq accesses
+system.cpu1.dcache.ReadReq_misses               41650                       # number of ReadReq misses
+system.cpu1.dcache.StoreCondReq_accesses        16345                       # number of StoreCondReq accesses(hits+misses)
+system.cpu1.dcache.StoreCondReq_hits            13438                       # number of StoreCondReq hits
+system.cpu1.dcache.StoreCondReq_miss_rate     0.177853                       # miss rate for StoreCondReq accesses
+system.cpu1.dcache.StoreCondReq_misses           2907                       # number of StoreCondReq misses
+system.cpu1.dcache.WriteReq_accesses           733305                       # number of WriteReq accesses(hits+misses)
+system.cpu1.dcache.WriteReq_hits               702800                       # number of WriteReq hits
+system.cpu1.dcache.WriteReq_miss_rate        0.041599                       # miss rate for WriteReq accesses
+system.cpu1.dcache.WriteReq_misses              30505                       # number of WriteReq misses
 system.cpu1.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu1.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu1.dcache.avg_refs                 29.277705                       # Average number of references to valid blocks.
@@ -301,13 +266,13 @@ system.cpu1.dcache.blocked_no_targets               0                       # nu
 system.cpu1.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu1.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu1.dcache.cache_copies                     0                       # number of cache copies performed
-system.cpu1.dcache.demand_accesses            1917033                       # number of demand (read+write) accesses
+system.cpu1.dcache.demand_accesses            1884270                       # number of demand (read+write) accesses
 system.cpu1.dcache.demand_avg_miss_latency            0                       # average overall miss latency
 system.cpu1.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu1.dcache.demand_hits                1847506                       # number of demand (read+write) hits
+system.cpu1.dcache.demand_hits                1812115                       # number of demand (read+write) hits
 system.cpu1.dcache.demand_miss_latency              0                       # number of demand (read+write) miss cycles
-system.cpu1.dcache.demand_miss_rate          0.036268                       # miss rate for demand accesses
-system.cpu1.dcache.demand_misses                69527                       # number of demand (read+write) misses
+system.cpu1.dcache.demand_miss_rate          0.038293                       # miss rate for demand accesses
+system.cpu1.dcache.demand_misses                72155                       # number of demand (read+write) misses
 system.cpu1.dcache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu1.dcache.demand_mshr_miss_latency            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu1.dcache.demand_mshr_miss_rate            0                       # mshr miss rate for demand accesses
@@ -315,14 +280,14 @@ system.cpu1.dcache.demand_mshr_misses               0                       # nu
 system.cpu1.dcache.fast_writes                      0                       # number of fast writes performed
 system.cpu1.dcache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu1.dcache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu1.dcache.overall_accesses           1917033                       # number of overall (read+write) accesses
+system.cpu1.dcache.overall_accesses           1884270                       # number of overall (read+write) accesses
 system.cpu1.dcache.overall_avg_miss_latency            0                       # average overall miss latency
 system.cpu1.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.cpu1.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu1.dcache.overall_hits               1847506                       # number of overall hits
+system.cpu1.dcache.overall_hits               1812115                       # number of overall hits
 system.cpu1.dcache.overall_miss_latency             0                       # number of overall miss cycles
-system.cpu1.dcache.overall_miss_rate         0.036268                       # miss rate for overall accesses
-system.cpu1.dcache.overall_misses               69527                       # number of overall misses
+system.cpu1.dcache.overall_miss_rate         0.038293                       # miss rate for overall accesses
+system.cpu1.dcache.overall_misses               72155                       # number of overall misses
 system.cpu1.dcache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu1.dcache.overall_mshr_miss_latency            0                       # number of overall MSHR miss cycles
 system.cpu1.dcache.overall_mshr_miss_rate            0                       # mshr miss rate for overall accesses
@@ -338,39 +303,13 @@ system.cpu1.dcache.prefetcher.num_hwpf_issued            0
 system.cpu1.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu1.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu1.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu1.dcache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu1.dcache.protocol.read_invalid        42939                       # read misses to invalid blocks
-system.cpu1.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu1.dcache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_read_exclusive          939                       # read snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_read_modified         2438                       # read snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_read_owned          337                       # read snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_read_shared        61772                       # read snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_readex_exclusive          103                       # readEx snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_readex_modified          275                       # readEx snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_readex_owned           44                       # readEx snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_readex_shared           39                       # readEx snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_upgrade_owned         1538                       # upgrade snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_upgrade_shared         2755                       # upgradee snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu1.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu1.dcache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu1.dcache.protocol.write_invalid        24475                       # write misses to invalid blocks
-system.cpu1.dcache.protocol.write_owned           641                       # write misses to owned blocks
-system.cpu1.dcache.protocol.write_shared         1472                       # write misses to shared blocks
 system.cpu1.dcache.replacements                 62341                       # number of replacements
 system.cpu1.dcache.sampled_refs                 62660                       # Sample count of references to valid blocks.
 system.cpu1.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu1.dcache.tagsinuse               391.945837                       # Cycle average of tags in use
 system.cpu1.dcache.total_refs                 1834541                       # Total number of references to valid blocks.
 system.cpu1.dcache.warmup_cycle          1851266680500                       # Cycle when the warmup percentage was hit.
-system.cpu1.dcache.writebacks                       0                       # number of writebacks
+system.cpu1.dcache.writebacks                   30850                       # number of writebacks
 system.cpu1.dtb.accesses                       323622                       # DTB accesses
 system.cpu1.dtb.acv                               116                       # DTB access violations
 system.cpu1.dtb.hits                          1914885                       # DTB hits
@@ -432,32 +371,6 @@ system.cpu1.icache.prefetcher.num_hwpf_issued            0
 system.cpu1.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu1.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu1.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu1.icache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu1.icache.protocol.read_invalid       103636                       # read misses to invalid blocks
-system.cpu1.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu1.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu1.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu1.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu1.icache.protocol.snoop_read_exclusive        17317                       # read snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu1.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu1.icache.protocol.snoop_read_shared       199395                       # read snoops on shared blocks
-system.cpu1.icache.protocol.snoop_readex_exclusive           25                       # readEx snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu1.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu1.icache.protocol.snoop_readex_shared            0                       # readEx snoops on shared blocks
-system.cpu1.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu1.icache.protocol.snoop_upgrade_shared            2                       # upgradee snoops on shared blocks
-system.cpu1.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu1.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu1.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu1.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu1.icache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu1.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu1.icache.protocol.write_owned             0                       # write misses to owned blocks
-system.cpu1.icache.protocol.write_shared            0                       # write misses to shared blocks
 system.cpu1.icache.replacements                103097                       # number of replacements
 system.cpu1.icache.sampled_refs                103609                       # Sample count of references to valid blocks.
 system.cpu1.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
@@ -559,30 +472,33 @@ system.disk2.dma_write_bytes                     8192                       # Nu
 system.disk2.dma_write_full_pages                   1                       # Number of full page size DMA writes.
 system.disk2.dma_write_txs                          1                       # Number of DMA write transactions.
 system.l2c.ReadExReq_accesses                  306246                       # number of ReadExReq accesses(hits+misses)
-system.l2c.ReadExReq_hits                      181108                       # number of ReadExReq hits
-system.l2c.ReadExReq_miss_rate               0.408619                       # miss rate for ReadExReq accesses
-system.l2c.ReadExReq_misses                    125138                       # number of ReadExReq misses
+system.l2c.ReadExReq_miss_rate                      1                       # miss rate for ReadExReq accesses
+system.l2c.ReadExReq_misses                    306246                       # number of ReadExReq misses
 system.l2c.ReadReq_accesses                   2724166                       # number of ReadReq accesses(hits+misses)
-system.l2c.ReadReq_hits                       1782863                       # number of ReadReq hits
-system.l2c.ReadReq_miss_rate                 0.345538                       # miss rate for ReadReq accesses
-system.l2c.ReadReq_misses                      941303                       # number of ReadReq misses
-system.l2c.Writeback_accesses                  427634                       # number of Writeback accesses(hits+misses)
-system.l2c.Writeback_hits                      427634                       # number of Writeback hits
+system.l2c.ReadReq_hits                       1625506                       # number of ReadReq hits
+system.l2c.ReadReq_miss_rate                 0.403301                       # miss rate for ReadReq accesses
+system.l2c.ReadReq_misses                     1098660                       # number of ReadReq misses
+system.l2c.UpgradeReq_accesses                 125013                       # number of UpgradeReq accesses(hits+misses)
+system.l2c.UpgradeReq_miss_rate                     1                       # miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_misses                   125013                       # number of UpgradeReq misses
+system.l2c.Writeback_accesses                  427646                       # number of Writeback accesses(hits+misses)
+system.l2c.Writeback_miss_rate                      1                       # miss rate for Writeback accesses
+system.l2c.Writeback_misses                    427646                       # number of Writeback misses
 system.l2c.avg_blocked_cycles_no_mshrs   <err: div-0>                       # average number of cycles each access was blocked
 system.l2c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.l2c.avg_refs                          2.242879                       # Average number of references to valid blocks.
+system.l2c.avg_refs                          1.720013                       # Average number of references to valid blocks.
 system.l2c.blocked_no_mshrs                         0                       # number of cycles access was blocked
 system.l2c.blocked_no_targets                       0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_mshrs                  0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_targets                0                       # number of cycles access was blocked
 system.l2c.cache_copies                             0                       # number of cache copies performed
-system.l2c.demand_accesses                    2724166                       # number of demand (read+write) accesses
+system.l2c.demand_accesses                    3030412                       # number of demand (read+write) accesses
 system.l2c.demand_avg_miss_latency                  0                       # average overall miss latency
 system.l2c.demand_avg_mshr_miss_latency  <err: div-0>                       # average overall mshr miss latency
-system.l2c.demand_hits                        1782863                       # number of demand (read+write) hits
+system.l2c.demand_hits                        1625506                       # number of demand (read+write) hits
 system.l2c.demand_miss_latency                      0                       # number of demand (read+write) miss cycles
-system.l2c.demand_miss_rate                  0.345538                       # miss rate for demand accesses
-system.l2c.demand_misses                       941303                       # number of demand (read+write) misses
+system.l2c.demand_miss_rate                  0.463602                       # miss rate for demand accesses
+system.l2c.demand_misses                      1404906                       # number of demand (read+write) misses
 system.l2c.demand_mshr_hits                         0                       # number of demand (read+write) MSHR hits
 system.l2c.demand_mshr_miss_latency                 0                       # number of demand (read+write) MSHR miss cycles
 system.l2c.demand_mshr_miss_rate                    0                       # mshr miss rate for demand accesses
@@ -590,14 +506,14 @@ system.l2c.demand_mshr_misses                       0                       # nu
 system.l2c.fast_writes                              0                       # number of fast writes performed
 system.l2c.mshr_cap_events                          0                       # number of times MSHR cap was activated
 system.l2c.no_allocate_misses                       0                       # Number of misses that were no-allocate
-system.l2c.overall_accesses                   3151800                       # number of overall (read+write) accesses
+system.l2c.overall_accesses                   3030412                       # number of overall (read+write) accesses
 system.l2c.overall_avg_miss_latency                 0                       # average overall miss latency
 system.l2c.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.l2c.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.l2c.overall_hits                       2210497                       # number of overall hits
+system.l2c.overall_hits                       1625506                       # number of overall hits
 system.l2c.overall_miss_latency                     0                       # number of overall miss cycles
-system.l2c.overall_miss_rate                 0.298656                       # miss rate for overall accesses
-system.l2c.overall_misses                      941303                       # number of overall misses
+system.l2c.overall_miss_rate                 0.463602                       # miss rate for overall accesses
+system.l2c.overall_misses                     1404906                       # number of overall misses
 system.l2c.overall_mshr_hits                        0                       # number of overall MSHR hits
 system.l2c.overall_mshr_miss_latency                0                       # number of overall MSHR miss cycles
 system.l2c.overall_mshr_miss_rate                   0                       # mshr miss rate for overall accesses
@@ -613,12 +529,12 @@ system.l2c.prefetcher.num_hwpf_issued               0                       # nu
 system.l2c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.l2c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.l2c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.l2c.replacements                       1000779                       # number of replacements
-system.l2c.sampled_refs                       1066159                       # Sample count of references to valid blocks.
+system.l2c.replacements                        947869                       # number of replacements
+system.l2c.sampled_refs                        966791                       # Sample count of references to valid blocks.
 system.l2c.soft_prefetch_mshr_full                  0                       # number of mshr full events for SW prefetching instrutions
-system.l2c.tagsinuse                     65517.575356                       # Cycle average of tags in use
-system.l2c.total_refs                         2391266                       # Total number of references to valid blocks.
-system.l2c.warmup_cycle                     618103500                       # Cycle when the warmup percentage was hit.
+system.l2c.tagsinuse                     15587.342424                       # Cycle average of tags in use
+system.l2c.total_refs                         1662893                       # Total number of references to valid blocks.
+system.l2c.warmup_cycle                     990121000                       # Cycle when the warmup percentage was hit.
 system.l2c.writebacks                               0                       # number of writebacks
 system.tsunami.ethernet.coalescedRxDesc  <err: div-0>                       # average number of RxDesc's coalesced into each post
 system.tsunami.ethernet.coalescedRxIdle  <err: div-0>                       # average number of RxIdle's coalesced into each post
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr
index 3e1cbc554..563ca3160 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr
@@ -1,5 +1,5 @@
-Listening for system connection on port 3456
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
-0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001
+Listening for system connection on port 3457
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
+0: system.remote_gdb.listener: listening for remote gdb on port 7002
 warn: Entering event queue @ 0.  Starting simulation...
 warn: 97861500: Trying to launch CPU number 1!
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout
index e4b69d1d0..1298154d9 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout
@@ -5,10 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:10:03
-M5 started Mon Jun 11 01:04:58 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual
+M5 compiled Aug  3 2007 04:02:11
+M5 started Fri Aug  3 04:22:43 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual
 Global frequency set at 1000000000000 ticks per second
-      0: system.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
-Exiting @ tick 1870335097000 because m5_exit instruction encountered
+Exiting @ tick 1870335101500 because m5_exit instruction encountered
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini
index 0347fbde9..3457f5f8f 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini
@@ -35,7 +35,7 @@ side_b=system.membus.port[0]
 
 [system.cpu]
 type=AtomicSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=0
 defer_registration=false
@@ -55,18 +55,16 @@ profile=0
 progress_interval=0
 simulate_stalls=false
 system=system
+tracer=system.cpu.tracer
 width=1
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -84,12 +82,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -98,23 +94,15 @@ write_buffers=8
 cpu_side=system.cpu.dcache_port
 mem_side=system.toL2Bus.port[2]
 
-[system.cpu.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -132,12 +120,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -146,15 +132,13 @@ write_buffers=8
 cpu_side=system.cpu.icache_port
 mem_side=system.toL2Bus.port[1]
 
-[system.cpu.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu.itb]
 type=AlphaITB
 size=48
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.disk0]
 type=IdeDisk
 children=image
@@ -209,11 +193,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst
 
 [system.l2c]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=8
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -231,12 +213,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=4194304
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=16
 trace_addr=0
@@ -800,7 +780,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=system.tsunami
 system=system
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=system.tsunami
 year_is_bcd=false
 pio=system.iobus.port[23]
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt
index 722437701..cc91e4c90 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt
@@ -1,35 +1,44 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                1403977                       # Simulator instruction rate (inst/s)
-host_seconds                                    42.74                       # Real time elapsed on the host
-host_tick_rate                            42777462102                       # Simulator tick rate (ticks/s)
+host_inst_rate                                1294756                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 255900                       # Number of bytes of host memory used
+host_seconds                                    46.35                       # Real time elapsed on the host
+host_tick_rate                            39449403667                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                    60007317                       # Number of instructions simulated
 sim_seconds                                  1.828355                       # Number of seconds simulated
-sim_ticks                                1828355486000                       # Number of ticks simulated
-system.cpu.dcache.ReadReq_accesses            9723333                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_hits                7984498                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_rate          0.178831                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses              1738835                       # number of ReadReq misses
-system.cpu.dcache.WriteReq_accesses           6349447                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_hits               6045093                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_rate         0.047934                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses              304354                       # number of WriteReq misses
+sim_ticks                                1828355476000                       # Number of ticks simulated
+system.cpu.dcache.LoadLockedReq_accesses       200279                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu.dcache.LoadLockedReq_hits           183119                       # number of LoadLockedReq hits
+system.cpu.dcache.LoadLockedReq_miss_rate     0.085680                       # miss rate for LoadLockedReq accesses
+system.cpu.dcache.LoadLockedReq_misses          17160                       # number of LoadLockedReq misses
+system.cpu.dcache.ReadReq_accesses            9523054                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_hits                7801377                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_rate          0.180790                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses              1721677                       # number of ReadReq misses
+system.cpu.dcache.StoreCondReq_accesses        199258                       # number of StoreCondReq accesses(hits+misses)
+system.cpu.dcache.StoreCondReq_hits            169392                       # number of StoreCondReq hits
+system.cpu.dcache.StoreCondReq_miss_rate     0.149886                       # miss rate for StoreCondReq accesses
+system.cpu.dcache.StoreCondReq_misses           29866                       # number of StoreCondReq misses
+system.cpu.dcache.WriteReq_accesses           6150189                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_hits               5750772                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_rate         0.064944                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses              399417                       # number of WriteReq misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                   6.866566                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                   6.866558                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses            16072780                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses            15673243                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency            0                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                14029591                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                13552149                       # number of demand (read+write) hits
 system.cpu.dcache.demand_miss_latency               0                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.127121                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses               2043189                       # number of demand (read+write) misses
+system.cpu.dcache.demand_miss_rate           0.135332                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses               2121094                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_miss_latency            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate             0                       # mshr miss rate for demand accesses
@@ -37,14 +46,14 @@ system.cpu.dcache.demand_mshr_misses                0                       # nu
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses           16072780                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses           15673243                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency            0                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits               14029591                       # number of overall hits
+system.cpu.dcache.overall_hits               13552149                       # number of overall hits
 system.cpu.dcache.overall_miss_latency              0                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.127121                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses              2043189                       # number of overall misses
+system.cpu.dcache.overall_miss_rate          0.135332                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses              2121094                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_miss_latency            0                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate            0                       # mshr miss rate for overall accesses
@@ -60,39 +69,13 @@ system.cpu.dcache.prefetcher.num_hwpf_issued            0
 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.dcache.protocol.hwpf_invalid             0                       # hard prefetch misses to invalid blocks
-system.cpu.dcache.protocol.read_invalid       1738835                       # read misses to invalid blocks
-system.cpu.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu.dcache.protocol.snoop_inv_modified            1                       # Invalidate snoops on modified blocks
-system.cpu.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu.dcache.protocol.snoop_read_exclusive           10                       # read snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_read_modified           15                       # read snoops on modified blocks
-system.cpu.dcache.protocol.snoop_read_owned            2                       # read snoops on owned blocks
-system.cpu.dcache.protocol.snoop_read_shared          124                       # read snoops on shared blocks
-system.cpu.dcache.protocol.snoop_readex_exclusive            0                       # readEx snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu.dcache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu.dcache.protocol.snoop_readex_shared            0                       # readEx snoops on shared blocks
-system.cpu.dcache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu.dcache.protocol.snoop_upgrade_shared            0                       # upgradee snoops on shared blocks
-system.cpu.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu.dcache.protocol.swpf_invalid             0                       # soft prefetch misses to invalid blocks
-system.cpu.dcache.protocol.write_invalid       304342                       # write misses to invalid blocks
-system.cpu.dcache.protocol.write_owned              8                       # write misses to owned blocks
-system.cpu.dcache.protocol.write_shared             4                       # write misses to shared blocks
-system.cpu.dcache.replacements                2042664                       # number of replacements
-system.cpu.dcache.sampled_refs                2043176                       # Sample count of references to valid blocks.
+system.cpu.dcache.replacements                2042666                       # number of replacements
+system.cpu.dcache.sampled_refs                2043178                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.tagsinuse                511.997801                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                 14029603                       # Total number of references to valid blocks.
+system.cpu.dcache.total_refs                 14029601                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle               10840000                       # Cycle when the warmup percentage was hit.
-system.cpu.dcache.writebacks                        0                       # number of writebacks
+system.cpu.dcache.writebacks                   428885                       # number of writebacks
 system.cpu.dtb.accesses                       1020787                       # DTB accesses
 system.cpu.dtb.acv                                367                       # DTB access violations
 system.cpu.dtb.hits                          16053818                       # DTB hits
@@ -106,12 +89,12 @@ system.cpu.dtb.write_acv                          157                       # DT
 system.cpu.dtb.write_hits                     6349968                       # DTB write hits
 system.cpu.dtb.write_misses                      1142                       # DTB write misses
 system.cpu.icache.ReadReq_accesses           60007317                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_hits               59087263                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits               59087260                       # number of ReadReq hits
 system.cpu.icache.ReadReq_miss_rate          0.015332                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses               920054                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses               920057                       # number of ReadReq misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  64.229545                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  64.229332                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
@@ -120,10 +103,10 @@ system.cpu.icache.cache_copies                      0                       # nu
 system.cpu.icache.demand_accesses            60007317                       # number of demand (read+write) accesses
 system.cpu.icache.demand_avg_miss_latency            0                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                59087263                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                59087260                       # number of demand (read+write) hits
 system.cpu.icache.demand_miss_latency               0                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.015332                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                920054                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                920057                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_miss_latency            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate             0                       # mshr miss rate for demand accesses
@@ -135,10 +118,10 @@ system.cpu.icache.overall_accesses           60007317                       # nu
 system.cpu.icache.overall_avg_miss_latency            0                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits               59087263                       # number of overall hits
+system.cpu.icache.overall_hits               59087260                       # number of overall hits
 system.cpu.icache.overall_miss_latency              0                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.015332                       # miss rate for overall accesses
-system.cpu.icache.overall_misses               920054                       # number of overall misses
+system.cpu.icache.overall_misses               920057                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_miss_latency            0                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate            0                       # mshr miss rate for overall accesses
@@ -154,37 +137,11 @@ system.cpu.icache.prefetcher.num_hwpf_issued            0
 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.icache.protocol.hwpf_invalid             0                       # hard prefetch misses to invalid blocks
-system.cpu.icache.protocol.read_invalid        920054                       # read misses to invalid blocks
-system.cpu.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu.icache.protocol.snoop_read_exclusive          643                       # read snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu.icache.protocol.snoop_read_shared         1039                       # read snoops on shared blocks
-system.cpu.icache.protocol.snoop_readex_exclusive          105                       # readEx snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu.icache.protocol.snoop_readex_shared            1                       # readEx snoops on shared blocks
-system.cpu.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu.icache.protocol.snoop_upgrade_shared            9                       # upgradee snoops on shared blocks
-system.cpu.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu.icache.protocol.swpf_invalid             0                       # soft prefetch misses to invalid blocks
-system.cpu.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu.icache.protocol.write_owned              0                       # write misses to owned blocks
-system.cpu.icache.protocol.write_shared             0                       # write misses to shared blocks
-system.cpu.icache.replacements                 919427                       # number of replacements
-system.cpu.icache.sampled_refs                 919939                       # Sample count of references to valid blocks.
+system.cpu.icache.replacements                 919430                       # number of replacements
+system.cpu.icache.sampled_refs                 919942                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.tagsinuse                511.214820                       # Cycle average of tags in use
-system.cpu.icache.total_refs                 59087263                       # Total number of references to valid blocks.
+system.cpu.icache.total_refs                 59087260                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle             9686972500                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                     0.983588                       # Percentage of idle cycles
@@ -222,8 +179,8 @@ system.cpu.kern.ipl_good_0                      73448     49.29%     49.29% # nu
 system.cpu.kern.ipl_good_21                       243      0.16%     49.46% # number of times we switched to this ipl from a different ipl
 system.cpu.kern.ipl_good_22                      1865      1.25%     50.71% # number of times we switched to this ipl from a different ipl
 system.cpu.kern.ipl_good_31                     73448     49.29%    100.00% # number of times we switched to this ipl from a different ipl
-system.cpu.kern.ipl_ticks                1828355278500                       # number of cycles we spent at this ipl
-system.cpu.kern.ipl_ticks_0              1811087547500     99.06%     99.06% # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks                1828355268500                       # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks_0              1811087537500     99.06%     99.06% # number of cycles we spent at this ipl
 system.cpu.kern.ipl_ticks_21                 20110000      0.00%     99.06% # number of cycles we spent at this ipl
 system.cpu.kern.ipl_ticks_22                 80195000      0.00%     99.06% # number of cycles we spent at this ipl
 system.cpu.kern.ipl_ticks_31              17167426000      0.94%    100.00% # number of cycles we spent at this ipl
@@ -243,7 +200,7 @@ system.cpu.kern.mode_switch_good_user               1                       # fr
 system.cpu.kern.mode_switch_good_idle        0.081545                       # fraction of useful protection mode switches
 system.cpu.kern.mode_ticks_kernel         26834026500      1.47%      1.47% # number of ticks spent at the given mode
 system.cpu.kern.mode_ticks_user            1465069000      0.08%      1.55% # number of ticks spent at the given mode
-system.cpu.kern.mode_ticks_idle          1800056182000     98.45%    100.00% # number of ticks spent at the given mode
+system.cpu.kern.mode_ticks_idle          1800056172000     98.45%    100.00% # number of ticks spent at the given mode
 system.cpu.kern.swap_context                     4178                       # number of times the context was actually changed
 system.cpu.kern.syscall                           326                       # number of syscalls executed
 system.cpu.kern.syscall_2                           8      2.45%      2.45% # number of syscalls executed
@@ -293,30 +250,33 @@ system.disk2.dma_write_bytes                     8192                       # Nu
 system.disk2.dma_write_full_pages                   1                       # Number of full page size DMA writes.
 system.disk2.dma_write_txs                          1                       # Number of DMA write transactions.
 system.l2c.ReadExReq_accesses                  304342                       # number of ReadExReq accesses(hits+misses)
-system.l2c.ReadExReq_hits                      187346                       # number of ReadExReq hits
-system.l2c.ReadExReq_miss_rate               0.384423                       # miss rate for ReadExReq accesses
-system.l2c.ReadExReq_misses                    116996                       # number of ReadExReq misses
-system.l2c.ReadReq_accesses                   2658872                       # number of ReadReq accesses(hits+misses)
-system.l2c.ReadReq_hits                       1717828                       # number of ReadReq hits
-system.l2c.ReadReq_miss_rate                 0.353926                       # miss rate for ReadReq accesses
-system.l2c.ReadReq_misses                      941044                       # number of ReadReq misses
+system.l2c.ReadExReq_miss_rate                      1                       # miss rate for ReadExReq accesses
+system.l2c.ReadExReq_misses                    304342                       # number of ReadExReq misses
+system.l2c.ReadReq_accesses                   2658877                       # number of ReadReq accesses(hits+misses)
+system.l2c.ReadReq_hits                       1558398                       # number of ReadReq hits
+system.l2c.ReadReq_miss_rate                 0.413889                       # miss rate for ReadReq accesses
+system.l2c.ReadReq_misses                     1100479                       # number of ReadReq misses
+system.l2c.UpgradeReq_accesses                 124941                       # number of UpgradeReq accesses(hits+misses)
+system.l2c.UpgradeReq_miss_rate                     1                       # miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_misses                   124941                       # number of UpgradeReq misses
 system.l2c.Writeback_accesses                  428885                       # number of Writeback accesses(hits+misses)
-system.l2c.Writeback_hits                      428885                       # number of Writeback hits
+system.l2c.Writeback_miss_rate                      1                       # miss rate for Writeback accesses
+system.l2c.Writeback_misses                    428885                       # number of Writeback misses
 system.l2c.avg_blocked_cycles_no_mshrs   <err: div-0>                       # average number of cycles each access was blocked
 system.l2c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.l2c.avg_refs                          2.205901                       # Average number of references to valid blocks.
+system.l2c.avg_refs                          1.644070                       # Average number of references to valid blocks.
 system.l2c.blocked_no_mshrs                         0                       # number of cycles access was blocked
 system.l2c.blocked_no_targets                       0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_mshrs                  0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_targets                0                       # number of cycles access was blocked
 system.l2c.cache_copies                             0                       # number of cache copies performed
-system.l2c.demand_accesses                    2658872                       # number of demand (read+write) accesses
+system.l2c.demand_accesses                    2963219                       # number of demand (read+write) accesses
 system.l2c.demand_avg_miss_latency                  0                       # average overall miss latency
 system.l2c.demand_avg_mshr_miss_latency  <err: div-0>                       # average overall mshr miss latency
-system.l2c.demand_hits                        1717828                       # number of demand (read+write) hits
+system.l2c.demand_hits                        1558398                       # number of demand (read+write) hits
 system.l2c.demand_miss_latency                      0                       # number of demand (read+write) miss cycles
-system.l2c.demand_miss_rate                  0.353926                       # miss rate for demand accesses
-system.l2c.demand_misses                       941044                       # number of demand (read+write) misses
+system.l2c.demand_miss_rate                  0.474086                       # miss rate for demand accesses
+system.l2c.demand_misses                      1404821                       # number of demand (read+write) misses
 system.l2c.demand_mshr_hits                         0                       # number of demand (read+write) MSHR hits
 system.l2c.demand_mshr_miss_latency                 0                       # number of demand (read+write) MSHR miss cycles
 system.l2c.demand_mshr_miss_rate                    0                       # mshr miss rate for demand accesses
@@ -324,14 +284,14 @@ system.l2c.demand_mshr_misses                       0                       # nu
 system.l2c.fast_writes                              0                       # number of fast writes performed
 system.l2c.mshr_cap_events                          0                       # number of times MSHR cap was activated
 system.l2c.no_allocate_misses                       0                       # Number of misses that were no-allocate
-system.l2c.overall_accesses                   3087757                       # number of overall (read+write) accesses
+system.l2c.overall_accesses                   2963219                       # number of overall (read+write) accesses
 system.l2c.overall_avg_miss_latency                 0                       # average overall miss latency
 system.l2c.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
 system.l2c.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.l2c.overall_hits                       2146713                       # number of overall hits
+system.l2c.overall_hits                       1558398                       # number of overall hits
 system.l2c.overall_miss_latency                     0                       # number of overall miss cycles
-system.l2c.overall_miss_rate                 0.304766                       # miss rate for overall accesses
-system.l2c.overall_misses                      941044                       # number of overall misses
+system.l2c.overall_miss_rate                 0.474086                       # miss rate for overall accesses
+system.l2c.overall_misses                     1404821                       # number of overall misses
 system.l2c.overall_mshr_hits                        0                       # number of overall MSHR hits
 system.l2c.overall_mshr_miss_latency                0                       # number of overall MSHR miss cycles
 system.l2c.overall_mshr_miss_rate                   0                       # mshr miss rate for overall accesses
@@ -347,12 +307,12 @@ system.l2c.prefetcher.num_hwpf_issued               0                       # nu
 system.l2c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.l2c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.l2c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.l2c.replacements                        992432                       # number of replacements
-system.l2c.sampled_refs                       1057820                       # Sample count of references to valid blocks.
+system.l2c.replacements                        947436                       # number of replacements
+system.l2c.sampled_refs                        965232                       # Sample count of references to valid blocks.
 system.l2c.soft_prefetch_mshr_full                  0                       # number of mshr full events for SW prefetching instrutions
-system.l2c.tagsinuse                     65517.661064                       # Cycle average of tags in use
-system.l2c.total_refs                         2333446                       # Total number of references to valid blocks.
-system.l2c.warmup_cycle                     614754000                       # Cycle when the warmup percentage was hit.
+system.l2c.tagsinuse                     15309.548937                       # Cycle average of tags in use
+system.l2c.total_refs                         1586909                       # Total number of references to valid blocks.
+system.l2c.warmup_cycle                     789998500                       # Cycle when the warmup percentage was hit.
 system.l2c.writebacks                               0                       # number of writebacks
 system.tsunami.ethernet.coalescedRxDesc  <err: div-0>                       # average number of RxDesc's coalesced into each post
 system.tsunami.ethernet.coalescedRxIdle  <err: div-0>                       # average number of RxIdle's coalesced into each post
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr
index f34493a86..32120d9d6 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr
@@ -1,3 +1,3 @@
-Listening for system connection on port 3456
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+Listening for system connection on port 3457
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout
index 6a6b8d735..1f648aea1 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout
@@ -5,10 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:10:03
-M5 started Mon Jun 11 00:55:45 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic
+M5 compiled Aug  3 2007 04:02:11
+M5 started Fri Aug  3 04:21:55 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic
 Global frequency set at 1000000000000 ticks per second
-      0: system.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
-Exiting @ tick 1828355481500 because m5_exit instruction encountered
+Exiting @ tick 1828355476000 because m5_exit instruction encountered
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
index 552344dcb..bbfd059cd 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
@@ -35,7 +35,7 @@ side_b=system.membus.port[0]
 
 [system.cpu0]
 type=TimingSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=0
 defer_registration=false
@@ -54,17 +54,15 @@ phase=0
 profile=0
 progress_interval=0
 system=system
+tracer=system.cpu0.tracer
 dcache_port=system.cpu0.dcache.cpu_side
 icache_port=system.cpu0.icache.cpu_side
 
 [system.cpu0.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -82,12 +80,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu0.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -96,23 +92,15 @@ write_buffers=8
 cpu_side=system.cpu0.dcache_port
 mem_side=system.toL2Bus.port[2]
 
-[system.cpu0.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu0.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu0.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -130,12 +118,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu0.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -144,18 +130,16 @@ write_buffers=8
 cpu_side=system.cpu0.icache_port
 mem_side=system.toL2Bus.port[1]
 
-[system.cpu0.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu0.itb]
 type=AlphaITB
 size=48
 
+[system.cpu0.tracer]
+type=ExeTracer
+
 [system.cpu1]
 type=TimingSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=1
 defer_registration=false
@@ -174,17 +158,15 @@ phase=0
 profile=0
 progress_interval=0
 system=system
+tracer=system.cpu1.tracer
 dcache_port=system.cpu1.dcache.cpu_side
 icache_port=system.cpu1.icache.cpu_side
 
 [system.cpu1.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -202,12 +184,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu1.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -216,23 +196,15 @@ write_buffers=8
 cpu_side=system.cpu1.dcache_port
 mem_side=system.toL2Bus.port[4]
 
-[system.cpu1.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu1.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu1.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -250,12 +222,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu1.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -264,15 +234,13 @@ write_buffers=8
 cpu_side=system.cpu1.icache_port
 mem_side=system.toL2Bus.port[3]
 
-[system.cpu1.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu1.itb]
 type=AlphaITB
 size=48
 
+[system.cpu1.tracer]
+type=ExeTracer
+
 [system.disk0]
 type=IdeDisk
 children=image
@@ -327,11 +295,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst
 
 [system.l2c]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=8
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -349,12 +315,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=4194304
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=16
 trace_addr=0
@@ -918,7 +882,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=system.tsunami
 system=system
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=system.tsunami
 year_is_bcd=false
 pio=system.iobus.port[23]
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt
index 0e86983a6..b7e78eb06 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt
@@ -1,74 +1,93 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  62524                       # Simulator instruction rate (inst/s)
-host_seconds                                  1011.60                       # Real time elapsed on the host
-host_tick_rate                             1928760125                       # Simulator tick rate (ticks/s)
+host_inst_rate                                 608366                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 227884                       # Number of bytes of host memory used
+host_seconds                                   106.58                       # Real time elapsed on the host
+host_tick_rate                            18308931831                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                    63248814                       # Number of instructions simulated
-sim_seconds                                  1.951129                       # Number of seconds simulated
-sim_ticks                                1951129131000                       # Number of ticks simulated
-system.cpu0.dcache.ReadReq_accesses           9299202                       # number of ReadReq accesses(hits+misses)
-system.cpu0.dcache.ReadReq_avg_miss_latency 13073.177688                       # average ReadReq miss latency
-system.cpu0.dcache.ReadReq_avg_mshr_miss_latency 12073.152824                       # average ReadReq mshr miss latency
-system.cpu0.dcache.ReadReq_hits               7589849                       # number of ReadReq hits
-system.cpu0.dcache.ReadReq_miss_latency   22346675500                       # number of ReadReq miss cycles
-system.cpu0.dcache.ReadReq_miss_rate         0.183817                       # miss rate for ReadReq accesses
-system.cpu0.dcache.ReadReq_misses             1709353                       # number of ReadReq misses
-system.cpu0.dcache.ReadReq_mshr_miss_latency  20637280000                       # number of ReadReq MSHR miss cycles
-system.cpu0.dcache.ReadReq_mshr_miss_rate     0.183817                       # mshr miss rate for ReadReq accesses
-system.cpu0.dcache.ReadReq_mshr_misses        1709353                       # number of ReadReq MSHR misses
-system.cpu0.dcache.ReadReq_mshr_uncacheable         6873                       # number of ReadReq MSHR uncacheable
-system.cpu0.dcache.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu0.dcache.ReadResp_mshr_uncacheable_latency    841915000                       # number of ReadResp MSHR uncacheable cycles
-system.cpu0.dcache.WriteReq_accesses          6016348                       # number of WriteReq accesses(hits+misses)
-system.cpu0.dcache.WriteReq_avg_miss_latency 12644.438594                       # average WriteReq miss latency
-system.cpu0.dcache.WriteReq_avg_mshr_miss_latency 11630.972878                       # average WriteReq mshr miss latency
-system.cpu0.dcache.WriteReq_hits              5727689                       # number of WriteReq hits
-system.cpu0.dcache.WriteReq_miss_latency   3649931000                       # number of WriteReq miss cycles
-system.cpu0.dcache.WriteReq_miss_rate        0.047979                       # miss rate for WriteReq accesses
-system.cpu0.dcache.WriteReq_misses             288659                       # number of WriteReq misses
-system.cpu0.dcache.WriteReq_mshr_miss_latency   3357385000                       # number of WriteReq MSHR miss cycles
-system.cpu0.dcache.WriteReq_mshr_miss_rate     0.047979                       # mshr miss rate for WriteReq accesses
-system.cpu0.dcache.WriteReq_mshr_misses        288659                       # number of WriteReq MSHR misses
-system.cpu0.dcache.WriteReq_mshr_uncacheable         9698                       # number of WriteReq MSHR uncacheable
-system.cpu0.dcache.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu0.dcache.WriteResp_mshr_uncacheable_latency   1186164500                       # number of WriteResp MSHR uncacheable cycles
+sim_insts                                    64839479                       # Number of instructions simulated
+sim_seconds                                  1.951367                       # Number of seconds simulated
+sim_ticks                                1951367346000                       # Number of ticks simulated
+system.cpu0.dcache.LoadLockedReq_accesses       150248                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu0.dcache.LoadLockedReq_avg_miss_latency 10860.561606                       # average LoadLockedReq miss latency
+system.cpu0.dcache.LoadLockedReq_avg_mshr_miss_latency  9860.561606                       # average LoadLockedReq mshr miss latency
+system.cpu0.dcache.LoadLockedReq_hits          136751                       # number of LoadLockedReq hits
+system.cpu0.dcache.LoadLockedReq_miss_latency    146585000                       # number of LoadLockedReq miss cycles
+system.cpu0.dcache.LoadLockedReq_miss_rate     0.089831                       # miss rate for LoadLockedReq accesses
+system.cpu0.dcache.LoadLockedReq_misses         13497                       # number of LoadLockedReq misses
+system.cpu0.dcache.LoadLockedReq_mshr_miss_latency    133088000                       # number of LoadLockedReq MSHR miss cycles
+system.cpu0.dcache.LoadLockedReq_mshr_miss_rate     0.089831                       # mshr miss rate for LoadLockedReq accesses
+system.cpu0.dcache.LoadLockedReq_mshr_misses        13497                       # number of LoadLockedReq MSHR misses
+system.cpu0.dcache.ReadReq_accesses           7920707                       # number of ReadReq accesses(hits+misses)
+system.cpu0.dcache.ReadReq_avg_miss_latency 13239.029006                       # average ReadReq miss latency
+system.cpu0.dcache.ReadReq_avg_mshr_miss_latency 12239.003253                       # average ReadReq mshr miss latency
+system.cpu0.dcache.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu0.dcache.ReadReq_hits               6328668                       # number of ReadReq hits
+system.cpu0.dcache.ReadReq_miss_latency   21077050500                       # number of ReadReq miss cycles
+system.cpu0.dcache.ReadReq_miss_rate         0.200997                       # miss rate for ReadReq accesses
+system.cpu0.dcache.ReadReq_misses             1592039                       # number of ReadReq misses
+system.cpu0.dcache.ReadReq_mshr_miss_latency  19484970500                       # number of ReadReq MSHR miss cycles
+system.cpu0.dcache.ReadReq_mshr_miss_rate     0.200997                       # mshr miss rate for ReadReq accesses
+system.cpu0.dcache.ReadReq_mshr_misses        1592039                       # number of ReadReq MSHR misses
+system.cpu0.dcache.ReadReq_mshr_uncacheable_latency    846944000                       # number of ReadReq MSHR uncacheable cycles
+system.cpu0.dcache.StoreCondReq_accesses       149727                       # number of StoreCondReq accesses(hits+misses)
+system.cpu0.dcache.StoreCondReq_avg_miss_latency 12266.165876                       # average StoreCondReq miss latency
+system.cpu0.dcache.StoreCondReq_avg_mshr_miss_latency 11266.165876                       # average StoreCondReq mshr miss latency
+system.cpu0.dcache.StoreCondReq_hits           126963                       # number of StoreCondReq hits
+system.cpu0.dcache.StoreCondReq_miss_latency    279227000                       # number of StoreCondReq miss cycles
+system.cpu0.dcache.StoreCondReq_miss_rate     0.152037                       # miss rate for StoreCondReq accesses
+system.cpu0.dcache.StoreCondReq_misses          22764                       # number of StoreCondReq misses
+system.cpu0.dcache.StoreCondReq_mshr_miss_latency    256463000                       # number of StoreCondReq MSHR miss cycles
+system.cpu0.dcache.StoreCondReq_mshr_miss_rate     0.152037                       # mshr miss rate for StoreCondReq accesses
+system.cpu0.dcache.StoreCondReq_mshr_misses        22764                       # number of StoreCondReq MSHR misses
+system.cpu0.dcache.WriteReq_accesses          4824283                       # number of WriteReq accesses(hits+misses)
+system.cpu0.dcache.WriteReq_avg_miss_latency 13877.297001                       # average WriteReq miss latency
+system.cpu0.dcache.WriteReq_avg_mshr_miss_latency 12877.297001                       # average WriteReq mshr miss latency
+system.cpu0.dcache.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu0.dcache.WriteReq_hits              4508382                       # number of WriteReq hits
+system.cpu0.dcache.WriteReq_miss_latency   4383852000                       # number of WriteReq miss cycles
+system.cpu0.dcache.WriteReq_miss_rate        0.065481                       # miss rate for WriteReq accesses
+system.cpu0.dcache.WriteReq_misses             315901                       # number of WriteReq misses
+system.cpu0.dcache.WriteReq_mshr_miss_latency   4067951000                       # number of WriteReq MSHR miss cycles
+system.cpu0.dcache.WriteReq_mshr_miss_rate     0.065481                       # mshr miss rate for WriteReq accesses
+system.cpu0.dcache.WriteReq_mshr_misses        315901                       # number of WriteReq MSHR misses
+system.cpu0.dcache.WriteReq_mshr_uncacheable_latency   1297859000                       # number of WriteReq MSHR uncacheable cycles
 system.cpu0.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu0.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu0.dcache.avg_refs                  6.687909                       # Average number of references to valid blocks.
+system.cpu0.dcache.avg_refs                  6.121232                       # Average number of references to valid blocks.
 system.cpu0.dcache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu0.dcache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu0.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu0.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu0.dcache.cache_copies                     0                       # number of cache copies performed
-system.cpu0.dcache.demand_accesses           15315550                       # number of demand (read+write) accesses
-system.cpu0.dcache.demand_avg_miss_latency 13011.236419                       # average overall miss latency
-system.cpu0.dcache.demand_avg_mshr_miss_latency 12009.269714                       # average overall mshr miss latency
-system.cpu0.dcache.demand_hits               13317538                       # number of demand (read+write) hits
-system.cpu0.dcache.demand_miss_latency    25996606500                       # number of demand (read+write) miss cycles
-system.cpu0.dcache.demand_miss_rate          0.130456                       # miss rate for demand accesses
-system.cpu0.dcache.demand_misses              1998012                       # number of demand (read+write) misses
+system.cpu0.dcache.demand_accesses           12744990                       # number of demand (read+write) accesses
+system.cpu0.dcache.demand_avg_miss_latency 13344.708167                       # average overall miss latency
+system.cpu0.dcache.demand_avg_mshr_miss_latency 12344.686678                       # average overall mshr miss latency
+system.cpu0.dcache.demand_hits               10837050                       # number of demand (read+write) hits
+system.cpu0.dcache.demand_miss_latency    25460902500                       # number of demand (read+write) miss cycles
+system.cpu0.dcache.demand_miss_rate          0.149701                       # miss rate for demand accesses
+system.cpu0.dcache.demand_misses              1907940                       # number of demand (read+write) misses
 system.cpu0.dcache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu0.dcache.demand_mshr_miss_latency  23994665000                       # number of demand (read+write) MSHR miss cycles
-system.cpu0.dcache.demand_mshr_miss_rate     0.130456                       # mshr miss rate for demand accesses
-system.cpu0.dcache.demand_mshr_misses         1998012                       # number of demand (read+write) MSHR misses
+system.cpu0.dcache.demand_mshr_miss_latency  23552921500                       # number of demand (read+write) MSHR miss cycles
+system.cpu0.dcache.demand_mshr_miss_rate     0.149701                       # mshr miss rate for demand accesses
+system.cpu0.dcache.demand_mshr_misses         1907940                       # number of demand (read+write) MSHR misses
 system.cpu0.dcache.fast_writes                      0                       # number of fast writes performed
 system.cpu0.dcache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu0.dcache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu0.dcache.overall_accesses          15315550                       # number of overall (read+write) accesses
-system.cpu0.dcache.overall_avg_miss_latency 13011.236419                       # average overall miss latency
-system.cpu0.dcache.overall_avg_mshr_miss_latency 12009.269714                       # average overall mshr miss latency
-system.cpu0.dcache.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu0.dcache.overall_hits              13317538                       # number of overall hits
-system.cpu0.dcache.overall_miss_latency   25996606500                       # number of overall miss cycles
-system.cpu0.dcache.overall_miss_rate         0.130456                       # miss rate for overall accesses
-system.cpu0.dcache.overall_misses             1998012                       # number of overall misses
+system.cpu0.dcache.overall_accesses          12744990                       # number of overall (read+write) accesses
+system.cpu0.dcache.overall_avg_miss_latency 13344.708167                       # average overall miss latency
+system.cpu0.dcache.overall_avg_mshr_miss_latency 12344.686678                       # average overall mshr miss latency
+system.cpu0.dcache.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu0.dcache.overall_hits              10837050                       # number of overall hits
+system.cpu0.dcache.overall_miss_latency   25460902500                       # number of overall miss cycles
+system.cpu0.dcache.overall_miss_rate         0.149701                       # miss rate for overall accesses
+system.cpu0.dcache.overall_misses             1907940                       # number of overall misses
 system.cpu0.dcache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu0.dcache.overall_mshr_miss_latency  23994665000                       # number of overall MSHR miss cycles
-system.cpu0.dcache.overall_mshr_miss_rate     0.130456                       # mshr miss rate for overall accesses
-system.cpu0.dcache.overall_mshr_misses        1998012                       # number of overall MSHR misses
-system.cpu0.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu0.dcache.overall_mshr_uncacheable_misses        16571                       # number of overall MSHR uncacheable misses
+system.cpu0.dcache.overall_mshr_miss_latency  23552921500                       # number of overall MSHR miss cycles
+system.cpu0.dcache.overall_mshr_miss_rate     0.149701                       # mshr miss rate for overall accesses
+system.cpu0.dcache.overall_mshr_misses        1907940                       # number of overall MSHR misses
+system.cpu0.dcache.overall_mshr_uncacheable_latency   2144803000                       # number of overall MSHR uncacheable cycles
+system.cpu0.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu0.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu0.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu0.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -78,95 +97,69 @@ system.cpu0.dcache.prefetcher.num_hwpf_issued            0
 system.cpu0.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu0.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu0.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu0.dcache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu0.dcache.protocol.read_invalid      1709421                       # read misses to invalid blocks
-system.cpu0.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu0.dcache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_read_exclusive          908                       # read snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_read_modified         3762                       # read snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_read_owned           72                       # read snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_read_shared         2297                       # read snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_readex_exclusive          235                       # readEx snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_readex_modified          207                       # readEx snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_readex_owned           15                       # readEx snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_readex_shared            7                       # readEx snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_upgrade_owned         1074                       # upgrade snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_upgrade_shared          726                       # upgradee snoops on shared blocks
-system.cpu0.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu0.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu0.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu0.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu0.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu0.dcache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu0.dcache.protocol.write_invalid       284810                       # write misses to invalid blocks
-system.cpu0.dcache.protocol.write_owned          2533                       # write misses to owned blocks
-system.cpu0.dcache.protocol.write_shared         1354                       # write misses to shared blocks
-system.cpu0.dcache.replacements               1991354                       # number of replacements
-system.cpu0.dcache.sampled_refs               1991866                       # Sample count of references to valid blocks.
+system.cpu0.dcache.replacements               1829212                       # number of replacements
+system.cpu0.dcache.sampled_refs               1829724                       # Sample count of references to valid blocks.
 system.cpu0.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu0.dcache.tagsinuse               503.775443                       # Cycle average of tags in use
-system.cpu0.dcache.total_refs                13321418                       # Total number of references to valid blocks.
-system.cpu0.dcache.warmup_cycle              57953000                       # Cycle when the warmup percentage was hit.
-system.cpu0.dcache.writebacks                  401606                       # number of writebacks
-system.cpu0.dtb.accesses                       719860                       # DTB accesses
-system.cpu0.dtb.acv                               289                       # DTB access violations
-system.cpu0.dtb.hits                         15299767                       # DTB hits
-system.cpu0.dtb.misses                           8485                       # DTB misses
-system.cpu0.dtb.read_accesses                  524201                       # DTB read accesses
-system.cpu0.dtb.read_acv                          174                       # DTB read access violations
-system.cpu0.dtb.read_hits                     9282693                       # DTB read hits
-system.cpu0.dtb.read_misses                      7687                       # DTB read misses
-system.cpu0.dtb.write_accesses                 195659                       # DTB write accesses
-system.cpu0.dtb.write_acv                         115                       # DTB write access violations
-system.cpu0.dtb.write_hits                    6017074                       # DTB write hits
-system.cpu0.dtb.write_misses                      798                       # DTB write misses
-system.cpu0.icache.ReadReq_accesses          57872551                       # number of ReadReq accesses(hits+misses)
-system.cpu0.icache.ReadReq_avg_miss_latency 12029.752588                       # average ReadReq miss latency
-system.cpu0.icache.ReadReq_avg_mshr_miss_latency 11029.000057                       # average ReadReq mshr miss latency
-system.cpu0.icache.ReadReq_hits              56957639                       # number of ReadReq hits
-system.cpu0.icache.ReadReq_miss_latency   11006165000                       # number of ReadReq miss cycles
-system.cpu0.icache.ReadReq_miss_rate         0.015809                       # miss rate for ReadReq accesses
-system.cpu0.icache.ReadReq_misses              914912                       # number of ReadReq misses
-system.cpu0.icache.ReadReq_mshr_miss_latency  10090564500                       # number of ReadReq MSHR miss cycles
-system.cpu0.icache.ReadReq_mshr_miss_rate     0.015809                       # mshr miss rate for ReadReq accesses
-system.cpu0.icache.ReadReq_mshr_misses         914912                       # number of ReadReq MSHR misses
+system.cpu0.dcache.tagsinuse               497.900810                       # Cycle average of tags in use
+system.cpu0.dcache.total_refs                11200165                       # Total number of references to valid blocks.
+system.cpu0.dcache.warmup_cycle              58293000                       # Cycle when the warmup percentage was hit.
+system.cpu0.dcache.writebacks                  322933                       # number of writebacks
+system.cpu0.dtb.accesses                       725071                       # DTB accesses
+system.cpu0.dtb.acv                               305                       # DTB access violations
+system.cpu0.dtb.hits                         13035385                       # DTB hits
+system.cpu0.dtb.misses                           8682                       # DTB misses
+system.cpu0.dtb.read_accesses                  527638                       # DTB read accesses
+system.cpu0.dtb.read_acv                          184                       # DTB read access violations
+system.cpu0.dtb.read_hits                     8058540                       # DTB read hits
+system.cpu0.dtb.read_misses                      7858                       # DTB read misses
+system.cpu0.dtb.write_accesses                 197433                       # DTB write accesses
+system.cpu0.dtb.write_acv                         121                       # DTB write access violations
+system.cpu0.dtb.write_hits                    4976845                       # DTB write hits
+system.cpu0.dtb.write_misses                      824                       # DTB write misses
+system.cpu0.icache.ReadReq_accesses          51081135                       # number of ReadReq accesses(hits+misses)
+system.cpu0.icache.ReadReq_avg_miss_latency 12048.344860                       # average ReadReq miss latency
+system.cpu0.icache.ReadReq_avg_mshr_miss_latency 11047.036239                       # average ReadReq mshr miss latency
+system.cpu0.icache.ReadReq_hits              50399501                       # number of ReadReq hits
+system.cpu0.icache.ReadReq_miss_latency    8212561500                       # number of ReadReq miss cycles
+system.cpu0.icache.ReadReq_miss_rate         0.013344                       # miss rate for ReadReq accesses
+system.cpu0.icache.ReadReq_misses              681634                       # number of ReadReq misses
+system.cpu0.icache.ReadReq_mshr_miss_latency   7530035500                       # number of ReadReq MSHR miss cycles
+system.cpu0.icache.ReadReq_mshr_miss_rate     0.013344                       # mshr miss rate for ReadReq accesses
+system.cpu0.icache.ReadReq_mshr_misses         681634                       # number of ReadReq MSHR misses
 system.cpu0.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu0.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu0.icache.avg_refs                 62.632934                       # Average number of references to valid blocks.
+system.cpu0.icache.avg_refs                 73.953888                       # Average number of references to valid blocks.
 system.cpu0.icache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu0.icache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu0.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu0.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu0.icache.cache_copies                     0                       # number of cache copies performed
-system.cpu0.icache.demand_accesses           57872551                       # number of demand (read+write) accesses
-system.cpu0.icache.demand_avg_miss_latency 12029.752588                       # average overall miss latency
-system.cpu0.icache.demand_avg_mshr_miss_latency 11029.000057                       # average overall mshr miss latency
-system.cpu0.icache.demand_hits               56957639                       # number of demand (read+write) hits
-system.cpu0.icache.demand_miss_latency    11006165000                       # number of demand (read+write) miss cycles
-system.cpu0.icache.demand_miss_rate          0.015809                       # miss rate for demand accesses
-system.cpu0.icache.demand_misses               914912                       # number of demand (read+write) misses
+system.cpu0.icache.demand_accesses           51081135                       # number of demand (read+write) accesses
+system.cpu0.icache.demand_avg_miss_latency 12048.344860                       # average overall miss latency
+system.cpu0.icache.demand_avg_mshr_miss_latency 11047.036239                       # average overall mshr miss latency
+system.cpu0.icache.demand_hits               50399501                       # number of demand (read+write) hits
+system.cpu0.icache.demand_miss_latency     8212561500                       # number of demand (read+write) miss cycles
+system.cpu0.icache.demand_miss_rate          0.013344                       # miss rate for demand accesses
+system.cpu0.icache.demand_misses               681634                       # number of demand (read+write) misses
 system.cpu0.icache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu0.icache.demand_mshr_miss_latency  10090564500                       # number of demand (read+write) MSHR miss cycles
-system.cpu0.icache.demand_mshr_miss_rate     0.015809                       # mshr miss rate for demand accesses
-system.cpu0.icache.demand_mshr_misses          914912                       # number of demand (read+write) MSHR misses
+system.cpu0.icache.demand_mshr_miss_latency   7530035500                       # number of demand (read+write) MSHR miss cycles
+system.cpu0.icache.demand_mshr_miss_rate     0.013344                       # mshr miss rate for demand accesses
+system.cpu0.icache.demand_mshr_misses          681634                       # number of demand (read+write) MSHR misses
 system.cpu0.icache.fast_writes                      0                       # number of fast writes performed
 system.cpu0.icache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu0.icache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu0.icache.overall_accesses          57872551                       # number of overall (read+write) accesses
-system.cpu0.icache.overall_avg_miss_latency 12029.752588                       # average overall miss latency
-system.cpu0.icache.overall_avg_mshr_miss_latency 11029.000057                       # average overall mshr miss latency
+system.cpu0.icache.overall_accesses          51081135                       # number of overall (read+write) accesses
+system.cpu0.icache.overall_avg_miss_latency 12048.344860                       # average overall miss latency
+system.cpu0.icache.overall_avg_mshr_miss_latency 11047.036239                       # average overall mshr miss latency
 system.cpu0.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu0.icache.overall_hits              56957639                       # number of overall hits
-system.cpu0.icache.overall_miss_latency   11006165000                       # number of overall miss cycles
-system.cpu0.icache.overall_miss_rate         0.015809                       # miss rate for overall accesses
-system.cpu0.icache.overall_misses              914912                       # number of overall misses
+system.cpu0.icache.overall_hits              50399501                       # number of overall hits
+system.cpu0.icache.overall_miss_latency    8212561500                       # number of overall miss cycles
+system.cpu0.icache.overall_miss_rate         0.013344                       # miss rate for overall accesses
+system.cpu0.icache.overall_misses              681634                       # number of overall misses
 system.cpu0.icache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu0.icache.overall_mshr_miss_latency  10090564500                       # number of overall MSHR miss cycles
-system.cpu0.icache.overall_mshr_miss_rate     0.015809                       # mshr miss rate for overall accesses
-system.cpu0.icache.overall_mshr_misses         914912                       # number of overall MSHR misses
+system.cpu0.icache.overall_mshr_miss_latency   7530035500                       # number of overall MSHR miss cycles
+system.cpu0.icache.overall_mshr_miss_rate     0.013344                       # mshr miss rate for overall accesses
+system.cpu0.icache.overall_mshr_misses         681634                       # number of overall MSHR misses
 system.cpu0.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu0.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu0.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -178,199 +171,191 @@ system.cpu0.icache.prefetcher.num_hwpf_issued            0
 system.cpu0.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu0.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu0.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu0.icache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu0.icache.protocol.read_invalid       915158                       # read misses to invalid blocks
-system.cpu0.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu0.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu0.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu0.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu0.icache.protocol.snoop_read_exclusive         4652                       # read snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu0.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu0.icache.protocol.snoop_read_shared         8768                       # read snoops on shared blocks
-system.cpu0.icache.protocol.snoop_readex_exclusive          121                       # readEx snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu0.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu0.icache.protocol.snoop_readex_shared            1                       # readEx snoops on shared blocks
-system.cpu0.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu0.icache.protocol.snoop_upgrade_shared           12                       # upgradee snoops on shared blocks
-system.cpu0.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu0.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu0.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu0.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu0.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu0.icache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu0.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu0.icache.protocol.write_owned             0                       # write misses to owned blocks
-system.cpu0.icache.protocol.write_shared            0                       # write misses to shared blocks
-system.cpu0.icache.replacements                908876                       # number of replacements
-system.cpu0.icache.sampled_refs                909388                       # Sample count of references to valid blocks.
+system.cpu0.icache.replacements                680987                       # number of replacements
+system.cpu0.icache.sampled_refs                681499                       # Sample count of references to valid blocks.
 system.cpu0.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu0.icache.tagsinuse               508.806183                       # Cycle average of tags in use
-system.cpu0.icache.total_refs                56957639                       # Total number of references to valid blocks.
-system.cpu0.icache.warmup_cycle           34906249000                       # Cycle when the warmup percentage was hit.
+system.cpu0.icache.tagsinuse               508.821605                       # Cycle average of tags in use
+system.cpu0.icache.total_refs                50399501                       # Total number of references to valid blocks.
+system.cpu0.icache.warmup_cycle           35300494000                       # Cycle when the warmup percentage was hit.
 system.cpu0.icache.writebacks                       0                       # number of writebacks
-system.cpu0.idle_fraction                    0.943968                       # Percentage of idle cycles
-system.cpu0.itb.accesses                      3944641                       # ITB accesses
-system.cpu0.itb.acv                               143                       # ITB acv
-system.cpu0.itb.hits                          3940800                       # ITB hits
-system.cpu0.itb.misses                           3841                       # ITB misses
-system.cpu0.kern.callpal                       187118                       # number of callpals executed
+system.cpu0.idle_fraction                    0.949890                       # Percentage of idle cycles
+system.cpu0.itb.accesses                      3593148                       # ITB accesses
+system.cpu0.itb.acv                               161                       # ITB acv
+system.cpu0.itb.hits                          3589202                       # ITB hits
+system.cpu0.itb.misses                           3946                       # ITB misses
+system.cpu0.kern.callpal                       145952                       # number of callpals executed
 system.cpu0.kern.callpal_cserve                     1      0.00%      0.00% # number of callpals executed
-system.cpu0.kern.callpal_wripir                    96      0.05%      0.05% # number of callpals executed
-system.cpu0.kern.callpal_wrmces                     1      0.00%      0.05% # number of callpals executed
-system.cpu0.kern.callpal_wrfen                      1      0.00%      0.05% # number of callpals executed
-system.cpu0.kern.callpal_wrvptptr                   1      0.00%      0.05% # number of callpals executed
-system.cpu0.kern.callpal_swpctx                  3865      2.07%      2.12% # number of callpals executed
-system.cpu0.kern.callpal_tbi                       44      0.02%      2.14% # number of callpals executed
-system.cpu0.kern.callpal_wrent                      7      0.00%      2.15% # number of callpals executed
-system.cpu0.kern.callpal_swpipl                171254     91.52%     93.67% # number of callpals executed
-system.cpu0.kern.callpal_rdps                    6635      3.55%     97.21% # number of callpals executed
-system.cpu0.kern.callpal_wrkgp                      1      0.00%     97.21% # number of callpals executed
-system.cpu0.kern.callpal_wrusp                      4      0.00%     97.22% # number of callpals executed
-system.cpu0.kern.callpal_rdusp                      7      0.00%     97.22% # number of callpals executed
-system.cpu0.kern.callpal_whami                      2      0.00%     97.22% # number of callpals executed
-system.cpu0.kern.callpal_rti                     4694      2.51%     99.73% # number of callpals executed
-system.cpu0.kern.callpal_callsys                  356      0.19%     99.92% # number of callpals executed
-system.cpu0.kern.callpal_imb                      149      0.08%    100.00% # number of callpals executed
+system.cpu0.kern.callpal_wripir                   536      0.37%      0.37% # number of callpals executed
+system.cpu0.kern.callpal_wrmces                     1      0.00%      0.37% # number of callpals executed
+system.cpu0.kern.callpal_wrfen                      1      0.00%      0.37% # number of callpals executed
+system.cpu0.kern.callpal_wrvptptr                   1      0.00%      0.37% # number of callpals executed
+system.cpu0.kern.callpal_swpctx                  3014      2.07%      2.44% # number of callpals executed
+system.cpu0.kern.callpal_tbi                       46      0.03%      2.47% # number of callpals executed
+system.cpu0.kern.callpal_wrent                      7      0.00%      2.47% # number of callpals executed
+system.cpu0.kern.callpal_swpipl                131018     89.77%     92.24% # number of callpals executed
+system.cpu0.kern.callpal_rdps                    6493      4.45%     96.69% # number of callpals executed
+system.cpu0.kern.callpal_wrkgp                      1      0.00%     96.69% # number of callpals executed
+system.cpu0.kern.callpal_wrusp                      4      0.00%     96.69% # number of callpals executed
+system.cpu0.kern.callpal_rdusp                      8      0.01%     96.70% # number of callpals executed
+system.cpu0.kern.callpal_whami                      2      0.00%     96.70% # number of callpals executed
+system.cpu0.kern.callpal_rti                     4302      2.95%     99.65% # number of callpals executed
+system.cpu0.kern.callpal_callsys                  368      0.25%     99.90% # number of callpals executed
+system.cpu0.kern.callpal_imb                      149      0.10%    100.00% # number of callpals executed
 system.cpu0.kern.inst.arm                           0                       # number of arm instructions executed
-system.cpu0.kern.inst.hwrei                    201983                       # number of hwrei instructions executed
-system.cpu0.kern.inst.quiesce                    6162                       # number of quiesce instructions executed
-system.cpu0.kern.ipl_count                     178054                       # number of times we switched to this ipl
-system.cpu0.kern.ipl_count_0                    72322     40.62%     40.62% # number of times we switched to this ipl
-system.cpu0.kern.ipl_count_21                     131      0.07%     40.69% # number of times we switched to this ipl
-system.cpu0.kern.ipl_count_22                    1968      1.11%     41.80% # number of times we switched to this ipl
-system.cpu0.kern.ipl_count_30                       6      0.00%     41.80% # number of times we switched to this ipl
-system.cpu0.kern.ipl_count_31                  103627     58.20%    100.00% # number of times we switched to this ipl
-system.cpu0.kern.ipl_good                      144005                       # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_good_0                     70953     49.27%     49.27% # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_good_21                      131      0.09%     49.36% # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_good_22                     1968      1.37%     50.73% # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_good_30                        6      0.00%     50.73% # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_good_31                    70947     49.27%    100.00% # number of times we switched to this ipl from a different ipl
-system.cpu0.kern.ipl_ticks               1951128432000                       # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_ticks_0             1894864204500     97.12%     97.12% # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_ticks_21                72482500      0.00%     97.12% # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_ticks_22               564462000      0.03%     97.15% # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_ticks_30                 4114000      0.00%     97.15% # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_ticks_31             55623169000      2.85%    100.00% # number of cycles we spent at this ipl
-system.cpu0.kern.ipl_used_0                  0.981071                       # fraction of swpipl calls that actually changed the ipl
+system.cpu0.kern.inst.hwrei                    161590                       # number of hwrei instructions executed
+system.cpu0.kern.inst.quiesce                    6598                       # number of quiesce instructions executed
+system.cpu0.kern.ipl_count                     137863                       # number of times we switched to this ipl
+system.cpu0.kern.ipl_count_0                    55298     40.11%     40.11% # number of times we switched to this ipl
+system.cpu0.kern.ipl_count_21                     131      0.10%     40.21% # number of times we switched to this ipl
+system.cpu0.kern.ipl_count_22                    1969      1.43%     41.63% # number of times we switched to this ipl
+system.cpu0.kern.ipl_count_30                     442      0.32%     41.95% # number of times we switched to this ipl
+system.cpu0.kern.ipl_count_31                   80023     58.05%    100.00% # number of times we switched to this ipl
+system.cpu0.kern.ipl_good                      111708                       # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_good_0                     54804     49.06%     49.06% # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_good_21                      131      0.12%     49.18% # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_good_22                     1969      1.76%     50.94% # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_good_30                      442      0.40%     51.34% # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_good_31                    54362     48.66%    100.00% # number of times we switched to this ipl from a different ipl
+system.cpu0.kern.ipl_ticks               1951366621000                       # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_ticks_0             1898503749000     97.29%     97.29% # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_ticks_21                76310500      0.00%     97.29% # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_ticks_22               547835000      0.03%     97.32% # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_ticks_30               278789500      0.01%     97.34% # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_ticks_31             51959937000      2.66%    100.00% # number of cycles we spent at this ipl
+system.cpu0.kern.ipl_used_0                  0.991067                       # fraction of swpipl calls that actually changed the ipl
 system.cpu0.kern.ipl_used_21                        1                       # fraction of swpipl calls that actually changed the ipl
 system.cpu0.kern.ipl_used_22                        1                       # fraction of swpipl calls that actually changed the ipl
 system.cpu0.kern.ipl_used_30                        1                       # fraction of swpipl calls that actually changed the ipl
-system.cpu0.kern.ipl_used_31                 0.684638                       # fraction of swpipl calls that actually changed the ipl
-system.cpu0.kern.mode_good_kernel                1230                      
-system.cpu0.kern.mode_good_user                  1231                      
+system.cpu0.kern.ipl_used_31                 0.679330                       # fraction of swpipl calls that actually changed the ipl
+system.cpu0.kern.mode_good_kernel                1275                      
+system.cpu0.kern.mode_good_user                  1276                      
 system.cpu0.kern.mode_good_idle                     0                      
-system.cpu0.kern.mode_switch_kernel              7215                       # number of protection mode switches
-system.cpu0.kern.mode_switch_user                1231                       # number of protection mode switches
+system.cpu0.kern.mode_switch_kernel              6846                       # number of protection mode switches
+system.cpu0.kern.mode_switch_user                1276                       # number of protection mode switches
 system.cpu0.kern.mode_switch_idle                   0                       # number of protection mode switches
 system.cpu0.kern.mode_switch_good        <err: div-0>                       # fraction of useful protection mode switches
-system.cpu0.kern.mode_switch_good_kernel     0.170478                       # fraction of useful protection mode switches
+system.cpu0.kern.mode_switch_good_kernel     0.186240                       # fraction of useful protection mode switches
 system.cpu0.kern.mode_switch_good_user              1                       # fraction of useful protection mode switches
 system.cpu0.kern.mode_switch_good_idle   <err: div-0>                       # fraction of useful protection mode switches
-system.cpu0.kern.mode_ticks_kernel       1947973402000     99.84%     99.84% # number of ticks spent at the given mode
-system.cpu0.kern.mode_ticks_user           3155028000      0.16%    100.00% # number of ticks spent at the given mode
+system.cpu0.kern.mode_ticks_kernel       1948118613000     99.83%     99.83% # number of ticks spent at the given mode
+system.cpu0.kern.mode_ticks_user           3248006000      0.17%    100.00% # number of ticks spent at the given mode
 system.cpu0.kern.mode_ticks_idle                    0      0.00%    100.00% # number of ticks spent at the given mode
-system.cpu0.kern.swap_context                    3866                       # number of times the context was actually changed
-system.cpu0.kern.syscall                          224                       # number of syscalls executed
-system.cpu0.kern.syscall_2                          6      2.68%      2.68% # number of syscalls executed
-system.cpu0.kern.syscall_3                         19      8.48%     11.16% # number of syscalls executed
-system.cpu0.kern.syscall_4                          3      1.34%     12.50% # number of syscalls executed
-system.cpu0.kern.syscall_6                         30     13.39%     25.89% # number of syscalls executed
-system.cpu0.kern.syscall_12                         1      0.45%     26.34% # number of syscalls executed
-system.cpu0.kern.syscall_15                         1      0.45%     26.79% # number of syscalls executed
-system.cpu0.kern.syscall_17                        10      4.46%     31.25% # number of syscalls executed
-system.cpu0.kern.syscall_19                         6      2.68%     33.93% # number of syscalls executed
-system.cpu0.kern.syscall_20                         4      1.79%     35.71% # number of syscalls executed
-system.cpu0.kern.syscall_23                         2      0.89%     36.61% # number of syscalls executed
-system.cpu0.kern.syscall_24                         4      1.79%     38.39% # number of syscalls executed
-system.cpu0.kern.syscall_33                         8      3.57%     41.96% # number of syscalls executed
-system.cpu0.kern.syscall_41                         2      0.89%     42.86% # number of syscalls executed
-system.cpu0.kern.syscall_45                        39     17.41%     60.27% # number of syscalls executed
-system.cpu0.kern.syscall_47                         4      1.79%     62.05% # number of syscalls executed
-system.cpu0.kern.syscall_48                         7      3.12%     65.18% # number of syscalls executed
-system.cpu0.kern.syscall_54                         9      4.02%     69.20% # number of syscalls executed
-system.cpu0.kern.syscall_58                         1      0.45%     69.64% # number of syscalls executed
-system.cpu0.kern.syscall_59                         5      2.23%     71.88% # number of syscalls executed
-system.cpu0.kern.syscall_71                        32     14.29%     86.16% # number of syscalls executed
-system.cpu0.kern.syscall_73                         3      1.34%     87.50% # number of syscalls executed
-system.cpu0.kern.syscall_74                         9      4.02%     91.52% # number of syscalls executed
-system.cpu0.kern.syscall_87                         1      0.45%     91.96% # number of syscalls executed
-system.cpu0.kern.syscall_90                         2      0.89%     92.86% # number of syscalls executed
-system.cpu0.kern.syscall_92                         7      3.12%     95.98% # number of syscalls executed
-system.cpu0.kern.syscall_97                         2      0.89%     96.87% # number of syscalls executed
-system.cpu0.kern.syscall_98                         2      0.89%     97.77% # number of syscalls executed
-system.cpu0.kern.syscall_132                        2      0.89%     98.66% # number of syscalls executed
-system.cpu0.kern.syscall_144                        1      0.45%     99.11% # number of syscalls executed
-system.cpu0.kern.syscall_147                        2      0.89%    100.00% # number of syscalls executed
-system.cpu0.not_idle_fraction                0.056032                       # Percentage of non-idle cycles
-system.cpu0.numCycles                    1951129131000                       # number of cpu cycles simulated
-system.cpu0.num_insts                        57872550                       # Number of instructions executed
-system.cpu0.num_refs                         15541096                       # Number of memory references
-system.cpu1.dcache.ReadReq_accesses           1052558                       # number of ReadReq accesses(hits+misses)
-system.cpu1.dcache.ReadReq_avg_miss_latency 11119.734481                       # average ReadReq miss latency
-system.cpu1.dcache.ReadReq_avg_mshr_miss_latency 10119.576119                       # average ReadReq mshr miss latency
-system.cpu1.dcache.ReadReq_hits               1014670                       # number of ReadReq hits
-system.cpu1.dcache.ReadReq_miss_latency     421304500                       # number of ReadReq miss cycles
-system.cpu1.dcache.ReadReq_miss_rate         0.035996                       # miss rate for ReadReq accesses
-system.cpu1.dcache.ReadReq_misses               37888                       # number of ReadReq misses
-system.cpu1.dcache.ReadReq_mshr_miss_latency    383410500                       # number of ReadReq MSHR miss cycles
-system.cpu1.dcache.ReadReq_mshr_miss_rate     0.035996                       # mshr miss rate for ReadReq accesses
-system.cpu1.dcache.ReadReq_mshr_misses          37888                       # number of ReadReq MSHR misses
-system.cpu1.dcache.ReadReq_mshr_uncacheable          120                       # number of ReadReq MSHR uncacheable
-system.cpu1.dcache.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu1.dcache.ReadResp_mshr_uncacheable_latency     14641500                       # number of ReadResp MSHR uncacheable cycles
-system.cpu1.dcache.WriteReq_accesses           677186                       # number of WriteReq accesses(hits+misses)
-system.cpu1.dcache.WriteReq_avg_miss_latency 11920.138166                       # average WriteReq miss latency
-system.cpu1.dcache.WriteReq_avg_mshr_miss_latency 10843.231096                       # average WriteReq mshr miss latency
-system.cpu1.dcache.WriteReq_hits               653157                       # number of WriteReq hits
-system.cpu1.dcache.WriteReq_miss_latency    286429000                       # number of WriteReq miss cycles
-system.cpu1.dcache.WriteReq_miss_rate        0.035484                       # miss rate for WriteReq accesses
-system.cpu1.dcache.WriteReq_misses              24029                       # number of WriteReq misses
-system.cpu1.dcache.WriteReq_mshr_miss_latency    260552000                       # number of WriteReq MSHR miss cycles
-system.cpu1.dcache.WriteReq_mshr_miss_rate     0.035484                       # mshr miss rate for WriteReq accesses
-system.cpu1.dcache.WriteReq_mshr_misses         24029                       # number of WriteReq MSHR misses
-system.cpu1.dcache.WriteReq_mshr_uncacheable         2496                       # number of WriteReq MSHR uncacheable
-system.cpu1.dcache.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu1.dcache.WriteResp_mshr_uncacheable_latency    304596500                       # number of WriteResp MSHR uncacheable cycles
+system.cpu0.kern.swap_context                    3015                       # number of times the context was actually changed
+system.cpu0.kern.syscall                          228                       # number of syscalls executed
+system.cpu0.kern.syscall_2                          7      3.07%      3.07% # number of syscalls executed
+system.cpu0.kern.syscall_3                         19      8.33%     11.40% # number of syscalls executed
+system.cpu0.kern.syscall_4                          3      1.32%     12.72% # number of syscalls executed
+system.cpu0.kern.syscall_6                         31     13.60%     26.32% # number of syscalls executed
+system.cpu0.kern.syscall_12                         1      0.44%     26.75% # number of syscalls executed
+system.cpu0.kern.syscall_15                         1      0.44%     27.19% # number of syscalls executed
+system.cpu0.kern.syscall_17                        10      4.39%     31.58% # number of syscalls executed
+system.cpu0.kern.syscall_19                         6      2.63%     34.21% # number of syscalls executed
+system.cpu0.kern.syscall_20                         4      1.75%     35.96% # number of syscalls executed
+system.cpu0.kern.syscall_23                         2      0.88%     36.84% # number of syscalls executed
+system.cpu0.kern.syscall_24                         4      1.75%     38.60% # number of syscalls executed
+system.cpu0.kern.syscall_33                         8      3.51%     42.11% # number of syscalls executed
+system.cpu0.kern.syscall_41                         2      0.88%     42.98% # number of syscalls executed
+system.cpu0.kern.syscall_45                        39     17.11%     60.09% # number of syscalls executed
+system.cpu0.kern.syscall_47                         4      1.75%     61.84% # number of syscalls executed
+system.cpu0.kern.syscall_48                         8      3.51%     65.35% # number of syscalls executed
+system.cpu0.kern.syscall_54                         9      3.95%     69.30% # number of syscalls executed
+system.cpu0.kern.syscall_58                         1      0.44%     69.74% # number of syscalls executed
+system.cpu0.kern.syscall_59                         6      2.63%     72.37% # number of syscalls executed
+system.cpu0.kern.syscall_71                        32     14.04%     86.40% # number of syscalls executed
+system.cpu0.kern.syscall_73                         3      1.32%     87.72% # number of syscalls executed
+system.cpu0.kern.syscall_74                         9      3.95%     91.67% # number of syscalls executed
+system.cpu0.kern.syscall_87                         1      0.44%     92.11% # number of syscalls executed
+system.cpu0.kern.syscall_90                         2      0.88%     92.98% # number of syscalls executed
+system.cpu0.kern.syscall_92                         7      3.07%     96.05% # number of syscalls executed
+system.cpu0.kern.syscall_97                         2      0.88%     96.93% # number of syscalls executed
+system.cpu0.kern.syscall_98                         2      0.88%     97.81% # number of syscalls executed
+system.cpu0.kern.syscall_132                        2      0.88%     98.68% # number of syscalls executed
+system.cpu0.kern.syscall_144                        1      0.44%     99.12% # number of syscalls executed
+system.cpu0.kern.syscall_147                        2      0.88%    100.00% # number of syscalls executed
+system.cpu0.not_idle_fraction                0.050110                       # Percentage of non-idle cycles
+system.cpu0.numCycles                    1951367346000                       # number of cpu cycles simulated
+system.cpu0.num_insts                        51081134                       # Number of instructions executed
+system.cpu0.num_refs                         13268864                       # Number of memory references
+system.cpu1.dcache.LoadLockedReq_accesses        61056                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu1.dcache.LoadLockedReq_avg_miss_latency  9095.192614                       # average LoadLockedReq miss latency
+system.cpu1.dcache.LoadLockedReq_avg_mshr_miss_latency  8095.192614                       # average LoadLockedReq mshr miss latency
+system.cpu1.dcache.LoadLockedReq_hits           51633                       # number of LoadLockedReq hits
+system.cpu1.dcache.LoadLockedReq_miss_latency     85704000                       # number of LoadLockedReq miss cycles
+system.cpu1.dcache.LoadLockedReq_miss_rate     0.154334                       # miss rate for LoadLockedReq accesses
+system.cpu1.dcache.LoadLockedReq_misses          9423                       # number of LoadLockedReq misses
+system.cpu1.dcache.LoadLockedReq_mshr_miss_latency     76281000                       # number of LoadLockedReq MSHR miss cycles
+system.cpu1.dcache.LoadLockedReq_mshr_miss_rate     0.154334                       # mshr miss rate for LoadLockedReq accesses
+system.cpu1.dcache.LoadLockedReq_mshr_misses         9423                       # number of LoadLockedReq MSHR misses
+system.cpu1.dcache.ReadReq_accesses           2457845                       # number of ReadReq accesses(hits+misses)
+system.cpu1.dcache.ReadReq_avg_miss_latency 11653.965886                       # average ReadReq miss latency
+system.cpu1.dcache.ReadReq_avg_mshr_miss_latency 10653.909138                       # average ReadReq mshr miss latency
+system.cpu1.dcache.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu1.dcache.ReadReq_hits               2334493                       # number of ReadReq hits
+system.cpu1.dcache.ReadReq_miss_latency    1437540000                       # number of ReadReq miss cycles
+system.cpu1.dcache.ReadReq_miss_rate         0.050187                       # miss rate for ReadReq accesses
+system.cpu1.dcache.ReadReq_misses              123352                       # number of ReadReq misses
+system.cpu1.dcache.ReadReq_mshr_miss_latency   1314181000                       # number of ReadReq MSHR miss cycles
+system.cpu1.dcache.ReadReq_mshr_miss_rate     0.050187                       # mshr miss rate for ReadReq accesses
+system.cpu1.dcache.ReadReq_mshr_misses         123352                       # number of ReadReq MSHR misses
+system.cpu1.dcache.ReadReq_mshr_uncacheable_latency     16729500                       # number of ReadReq MSHR uncacheable cycles
+system.cpu1.dcache.StoreCondReq_accesses        60551                       # number of StoreCondReq accesses(hits+misses)
+system.cpu1.dcache.StoreCondReq_avg_miss_latency 10960.125479                       # average StoreCondReq miss latency
+system.cpu1.dcache.StoreCondReq_avg_mshr_miss_latency  9960.125479                       # average StoreCondReq mshr miss latency
+system.cpu1.dcache.StoreCondReq_hits            46206                       # number of StoreCondReq hits
+system.cpu1.dcache.StoreCondReq_miss_latency    157223000                       # number of StoreCondReq miss cycles
+system.cpu1.dcache.StoreCondReq_miss_rate     0.236908                       # miss rate for StoreCondReq accesses
+system.cpu1.dcache.StoreCondReq_misses          14345                       # number of StoreCondReq misses
+system.cpu1.dcache.StoreCondReq_mshr_miss_latency    142878000                       # number of StoreCondReq MSHR miss cycles
+system.cpu1.dcache.StoreCondReq_mshr_miss_rate     0.236908                       # mshr miss rate for StoreCondReq accesses
+system.cpu1.dcache.StoreCondReq_mshr_misses        14345                       # number of StoreCondReq MSHR misses
+system.cpu1.dcache.WriteReq_accesses          1792743                       # number of WriteReq accesses(hits+misses)
+system.cpu1.dcache.WriteReq_avg_miss_latency 13398.121192                       # average WriteReq miss latency
+system.cpu1.dcache.WriteReq_avg_mshr_miss_latency 12398.121192                       # average WriteReq mshr miss latency
+system.cpu1.dcache.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu1.dcache.WriteReq_hits              1700344                       # number of WriteReq hits
+system.cpu1.dcache.WriteReq_miss_latency   1237973000                       # number of WriteReq miss cycles
+system.cpu1.dcache.WriteReq_miss_rate        0.051541                       # miss rate for WriteReq accesses
+system.cpu1.dcache.WriteReq_misses              92399                       # number of WriteReq misses
+system.cpu1.dcache.WriteReq_mshr_miss_latency   1145574000                       # number of WriteReq MSHR miss cycles
+system.cpu1.dcache.WriteReq_mshr_miss_rate     0.051541                       # mshr miss rate for WriteReq accesses
+system.cpu1.dcache.WriteReq_mshr_misses         92399                       # number of WriteReq MSHR misses
+system.cpu1.dcache.WriteReq_mshr_uncacheable_latency    421374000                       # number of WriteReq MSHR uncacheable cycles
 system.cpu1.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu1.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu1.dcache.avg_refs                 29.876823                       # Average number of references to valid blocks.
+system.cpu1.dcache.avg_refs                 23.577992                       # Average number of references to valid blocks.
 system.cpu1.dcache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu1.dcache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu1.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu1.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu1.dcache.cache_copies                     0                       # number of cache copies performed
-system.cpu1.dcache.demand_accesses            1729744                       # number of demand (read+write) accesses
-system.cpu1.dcache.demand_avg_miss_latency 11430.358383                       # average overall miss latency
-system.cpu1.dcache.demand_avg_mshr_miss_latency 10400.415072                       # average overall mshr miss latency
-system.cpu1.dcache.demand_hits                1667827                       # number of demand (read+write) hits
-system.cpu1.dcache.demand_miss_latency      707733500                       # number of demand (read+write) miss cycles
-system.cpu1.dcache.demand_miss_rate          0.035795                       # miss rate for demand accesses
-system.cpu1.dcache.demand_misses                61917                       # number of demand (read+write) misses
+system.cpu1.dcache.demand_accesses            4250588                       # number of demand (read+write) accesses
+system.cpu1.dcache.demand_avg_miss_latency 12400.929776                       # average overall miss latency
+system.cpu1.dcache.demand_avg_mshr_miss_latency 11400.897331                       # average overall mshr miss latency
+system.cpu1.dcache.demand_hits                4034837                       # number of demand (read+write) hits
+system.cpu1.dcache.demand_miss_latency     2675513000                       # number of demand (read+write) miss cycles
+system.cpu1.dcache.demand_miss_rate          0.050758                       # miss rate for demand accesses
+system.cpu1.dcache.demand_misses               215751                       # number of demand (read+write) misses
 system.cpu1.dcache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu1.dcache.demand_mshr_miss_latency    643962500                       # number of demand (read+write) MSHR miss cycles
-system.cpu1.dcache.demand_mshr_miss_rate     0.035795                       # mshr miss rate for demand accesses
-system.cpu1.dcache.demand_mshr_misses           61917                       # number of demand (read+write) MSHR misses
+system.cpu1.dcache.demand_mshr_miss_latency   2459755000                       # number of demand (read+write) MSHR miss cycles
+system.cpu1.dcache.demand_mshr_miss_rate     0.050758                       # mshr miss rate for demand accesses
+system.cpu1.dcache.demand_mshr_misses          215751                       # number of demand (read+write) MSHR misses
 system.cpu1.dcache.fast_writes                      0                       # number of fast writes performed
 system.cpu1.dcache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu1.dcache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu1.dcache.overall_accesses           1729744                       # number of overall (read+write) accesses
-system.cpu1.dcache.overall_avg_miss_latency 11430.358383                       # average overall miss latency
-system.cpu1.dcache.overall_avg_mshr_miss_latency 10400.415072                       # average overall mshr miss latency
-system.cpu1.dcache.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu1.dcache.overall_hits               1667827                       # number of overall hits
-system.cpu1.dcache.overall_miss_latency     707733500                       # number of overall miss cycles
-system.cpu1.dcache.overall_miss_rate         0.035795                       # miss rate for overall accesses
-system.cpu1.dcache.overall_misses               61917                       # number of overall misses
+system.cpu1.dcache.overall_accesses           4250588                       # number of overall (read+write) accesses
+system.cpu1.dcache.overall_avg_miss_latency 12400.929776                       # average overall miss latency
+system.cpu1.dcache.overall_avg_mshr_miss_latency 11400.897331                       # average overall mshr miss latency
+system.cpu1.dcache.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu1.dcache.overall_hits               4034837                       # number of overall hits
+system.cpu1.dcache.overall_miss_latency    2675513000                       # number of overall miss cycles
+system.cpu1.dcache.overall_miss_rate         0.050758                       # miss rate for overall accesses
+system.cpu1.dcache.overall_misses              215751                       # number of overall misses
 system.cpu1.dcache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu1.dcache.overall_mshr_miss_latency    643962500                       # number of overall MSHR miss cycles
-system.cpu1.dcache.overall_mshr_miss_rate     0.035795                       # mshr miss rate for overall accesses
-system.cpu1.dcache.overall_mshr_misses          61917                       # number of overall MSHR misses
-system.cpu1.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu1.dcache.overall_mshr_uncacheable_misses         2616                       # number of overall MSHR uncacheable misses
+system.cpu1.dcache.overall_mshr_miss_latency   2459755000                       # number of overall MSHR miss cycles
+system.cpu1.dcache.overall_mshr_miss_rate     0.050758                       # mshr miss rate for overall accesses
+system.cpu1.dcache.overall_mshr_misses         215751                       # number of overall MSHR misses
+system.cpu1.dcache.overall_mshr_uncacheable_latency    438103500                       # number of overall MSHR uncacheable cycles
+system.cpu1.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu1.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu1.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu1.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -380,95 +365,69 @@ system.cpu1.dcache.prefetcher.num_hwpf_issued            0
 system.cpu1.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu1.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu1.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu1.dcache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu1.dcache.protocol.read_invalid        37951                       # read misses to invalid blocks
-system.cpu1.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu1.dcache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_read_exclusive          906                       # read snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_read_modified         1965                       # read snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_read_owned          254                       # read snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_read_shared        65869                       # read snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_readex_exclusive          191                       # readEx snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_readex_modified          198                       # readEx snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_readex_owned           48                       # readEx snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_readex_shared           42                       # readEx snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_upgrade_owned         1132                       # upgrade snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_upgrade_shared         2716                       # upgradee snoops on shared blocks
-system.cpu1.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu1.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu1.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu1.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu1.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu1.dcache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu1.dcache.protocol.write_invalid        22206                       # write misses to invalid blocks
-system.cpu1.dcache.protocol.write_owned           601                       # write misses to owned blocks
-system.cpu1.dcache.protocol.write_shared         1247                       # write misses to shared blocks
-system.cpu1.dcache.replacements                 55360                       # number of replacements
-system.cpu1.dcache.sampled_refs                 55749                       # Sample count of references to valid blocks.
+system.cpu1.dcache.replacements                176474                       # number of replacements
+system.cpu1.dcache.sampled_refs                176909                       # Sample count of references to valid blocks.
 system.cpu1.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu1.dcache.tagsinuse               388.749341                       # Cycle average of tags in use
-system.cpu1.dcache.total_refs                 1665603                       # Total number of references to valid blocks.
-system.cpu1.dcache.warmup_cycle          1935095598000                       # Cycle when the warmup percentage was hit.
-system.cpu1.dcache.writebacks                   27663                       # number of writebacks
-system.cpu1.dtb.accesses                       302878                       # DTB accesses
-system.cpu1.dtb.acv                                84                       # DTB access violations
-system.cpu1.dtb.hits                          1728432                       # DTB hits
-system.cpu1.dtb.misses                           3106                       # DTB misses
-system.cpu1.dtb.read_accesses                  205838                       # DTB read accesses
-system.cpu1.dtb.read_acv                           36                       # DTB read access violations
-system.cpu1.dtb.read_hits                     1049360                       # DTB read hits
-system.cpu1.dtb.read_misses                      2750                       # DTB read misses
-system.cpu1.dtb.write_accesses                  97040                       # DTB write accesses
-system.cpu1.dtb.write_acv                          48                       # DTB write access violations
-system.cpu1.dtb.write_hits                     679072                       # DTB write hits
-system.cpu1.dtb.write_misses                      356                       # DTB write misses
-system.cpu1.icache.ReadReq_accesses           5376264                       # number of ReadReq accesses(hits+misses)
-system.cpu1.icache.ReadReq_avg_miss_latency 12045.939531                       # average ReadReq miss latency
-system.cpu1.icache.ReadReq_avg_mshr_miss_latency 11045.466957                       # average ReadReq mshr miss latency
-system.cpu1.icache.ReadReq_hits               5281041                       # number of ReadReq hits
-system.cpu1.icache.ReadReq_miss_latency    1147050500                       # number of ReadReq miss cycles
-system.cpu1.icache.ReadReq_miss_rate         0.017712                       # miss rate for ReadReq accesses
-system.cpu1.icache.ReadReq_misses               95223                       # number of ReadReq misses
-system.cpu1.icache.ReadReq_mshr_miss_latency   1051782500                       # number of ReadReq MSHR miss cycles
-system.cpu1.icache.ReadReq_mshr_miss_rate     0.017712                       # mshr miss rate for ReadReq accesses
-system.cpu1.icache.ReadReq_mshr_misses          95223                       # number of ReadReq MSHR misses
+system.cpu1.dcache.tagsinuse               471.274557                       # Cycle average of tags in use
+system.cpu1.dcache.total_refs                 4171159                       # Total number of references to valid blocks.
+system.cpu1.dcache.warmup_cycle          1917859097000                       # Cycle when the warmup percentage was hit.
+system.cpu1.dcache.writebacks                   93260                       # number of writebacks
+system.cpu1.dtb.accesses                       296718                       # DTB accesses
+system.cpu1.dtb.acv                                62                       # DTB access violations
+system.cpu1.dtb.hits                          4358656                       # DTB hits
+system.cpu1.dtb.misses                           2867                       # DTB misses
+system.cpu1.dtb.read_accesses                  201817                       # DTB read accesses
+system.cpu1.dtb.read_acv                           26                       # DTB read access violations
+system.cpu1.dtb.read_hits                     2507309                       # DTB read hits
+system.cpu1.dtb.read_misses                      2546                       # DTB read misses
+system.cpu1.dtb.write_accesses                  94901                       # DTB write accesses
+system.cpu1.dtb.write_acv                          36                       # DTB write access violations
+system.cpu1.dtb.write_hits                    1851347                       # DTB write hits
+system.cpu1.dtb.write_misses                      321                       # DTB write misses
+system.cpu1.icache.ReadReq_accesses          13758345                       # number of ReadReq accesses(hits+misses)
+system.cpu1.icache.ReadReq_avg_miss_latency 12026.498126                       # average ReadReq miss latency
+system.cpu1.icache.ReadReq_avg_mshr_miss_latency 11026.342473                       # average ReadReq mshr miss latency
+system.cpu1.icache.ReadReq_hits              13421057                       # number of ReadReq hits
+system.cpu1.icache.ReadReq_miss_latency    4056393500                       # number of ReadReq miss cycles
+system.cpu1.icache.ReadReq_miss_rate         0.024515                       # miss rate for ReadReq accesses
+system.cpu1.icache.ReadReq_misses              337288                       # number of ReadReq misses
+system.cpu1.icache.ReadReq_mshr_miss_latency   3719053000                       # number of ReadReq MSHR miss cycles
+system.cpu1.icache.ReadReq_mshr_miss_rate     0.024515                       # mshr miss rate for ReadReq accesses
+system.cpu1.icache.ReadReq_mshr_misses         337288                       # number of ReadReq MSHR misses
 system.cpu1.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu1.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu1.icache.avg_refs                 57.662729                       # Average number of references to valid blocks.
+system.cpu1.icache.avg_refs                 39.794511                       # Average number of references to valid blocks.
 system.cpu1.icache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu1.icache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu1.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu1.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu1.icache.cache_copies                     0                       # number of cache copies performed
-system.cpu1.icache.demand_accesses            5376264                       # number of demand (read+write) accesses
-system.cpu1.icache.demand_avg_miss_latency 12045.939531                       # average overall miss latency
-system.cpu1.icache.demand_avg_mshr_miss_latency 11045.466957                       # average overall mshr miss latency
-system.cpu1.icache.demand_hits                5281041                       # number of demand (read+write) hits
-system.cpu1.icache.demand_miss_latency     1147050500                       # number of demand (read+write) miss cycles
-system.cpu1.icache.demand_miss_rate          0.017712                       # miss rate for demand accesses
-system.cpu1.icache.demand_misses                95223                       # number of demand (read+write) misses
+system.cpu1.icache.demand_accesses           13758345                       # number of demand (read+write) accesses
+system.cpu1.icache.demand_avg_miss_latency 12026.498126                       # average overall miss latency
+system.cpu1.icache.demand_avg_mshr_miss_latency 11026.342473                       # average overall mshr miss latency
+system.cpu1.icache.demand_hits               13421057                       # number of demand (read+write) hits
+system.cpu1.icache.demand_miss_latency     4056393500                       # number of demand (read+write) miss cycles
+system.cpu1.icache.demand_miss_rate          0.024515                       # miss rate for demand accesses
+system.cpu1.icache.demand_misses               337288                       # number of demand (read+write) misses
 system.cpu1.icache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu1.icache.demand_mshr_miss_latency   1051782500                       # number of demand (read+write) MSHR miss cycles
-system.cpu1.icache.demand_mshr_miss_rate     0.017712                       # mshr miss rate for demand accesses
-system.cpu1.icache.demand_mshr_misses           95223                       # number of demand (read+write) MSHR misses
+system.cpu1.icache.demand_mshr_miss_latency   3719053000                       # number of demand (read+write) MSHR miss cycles
+system.cpu1.icache.demand_mshr_miss_rate     0.024515                       # mshr miss rate for demand accesses
+system.cpu1.icache.demand_mshr_misses          337288                       # number of demand (read+write) MSHR misses
 system.cpu1.icache.fast_writes                      0                       # number of fast writes performed
 system.cpu1.icache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu1.icache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu1.icache.overall_accesses           5376264                       # number of overall (read+write) accesses
-system.cpu1.icache.overall_avg_miss_latency 12045.939531                       # average overall miss latency
-system.cpu1.icache.overall_avg_mshr_miss_latency 11045.466957                       # average overall mshr miss latency
+system.cpu1.icache.overall_accesses          13758345                       # number of overall (read+write) accesses
+system.cpu1.icache.overall_avg_miss_latency 12026.498126                       # average overall miss latency
+system.cpu1.icache.overall_avg_mshr_miss_latency 11026.342473                       # average overall mshr miss latency
 system.cpu1.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu1.icache.overall_hits               5281041                       # number of overall hits
-system.cpu1.icache.overall_miss_latency    1147050500                       # number of overall miss cycles
-system.cpu1.icache.overall_miss_rate         0.017712                       # miss rate for overall accesses
-system.cpu1.icache.overall_misses               95223                       # number of overall misses
+system.cpu1.icache.overall_hits              13421057                       # number of overall hits
+system.cpu1.icache.overall_miss_latency    4056393500                       # number of overall miss cycles
+system.cpu1.icache.overall_miss_rate         0.024515                       # miss rate for overall accesses
+system.cpu1.icache.overall_misses              337288                       # number of overall misses
 system.cpu1.icache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu1.icache.overall_mshr_miss_latency   1051782500                       # number of overall MSHR miss cycles
-system.cpu1.icache.overall_mshr_miss_rate     0.017712                       # mshr miss rate for overall accesses
-system.cpu1.icache.overall_mshr_misses          95223                       # number of overall MSHR misses
+system.cpu1.icache.overall_mshr_miss_latency   3719053000                       # number of overall MSHR miss cycles
+system.cpu1.icache.overall_mshr_miss_rate     0.024515                       # mshr miss rate for overall accesses
+system.cpu1.icache.overall_mshr_misses         337288                       # number of overall MSHR misses
 system.cpu1.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu1.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu1.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -480,124 +439,98 @@ system.cpu1.icache.prefetcher.num_hwpf_issued            0
 system.cpu1.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu1.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu1.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu1.icache.protocol.hwpf_invalid            0                       # hard prefetch misses to invalid blocks
-system.cpu1.icache.protocol.read_invalid        97341                       # read misses to invalid blocks
-system.cpu1.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu1.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu1.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu1.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu1.icache.protocol.snoop_read_exclusive        39627                       # read snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu1.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu1.icache.protocol.snoop_read_shared       214588                       # read snoops on shared blocks
-system.cpu1.icache.protocol.snoop_readex_exclusive           26                       # readEx snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu1.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu1.icache.protocol.snoop_readex_shared            0                       # readEx snoops on shared blocks
-system.cpu1.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu1.icache.protocol.snoop_upgrade_shared            2                       # upgradee snoops on shared blocks
-system.cpu1.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu1.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu1.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu1.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu1.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu1.icache.protocol.swpf_invalid            0                       # soft prefetch misses to invalid blocks
-system.cpu1.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu1.icache.protocol.write_owned             0                       # write misses to owned blocks
-system.cpu1.icache.protocol.write_shared            0                       # write misses to shared blocks
-system.cpu1.icache.replacements                 91073                       # number of replacements
-system.cpu1.icache.sampled_refs                 91585                       # Sample count of references to valid blocks.
+system.cpu1.icache.replacements                336747                       # number of replacements
+system.cpu1.icache.sampled_refs                337259                       # Sample count of references to valid blocks.
 system.cpu1.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu1.icache.tagsinuse               420.500398                       # Cycle average of tags in use
-system.cpu1.icache.total_refs                 5281041                       # Total number of references to valid blocks.
-system.cpu1.icache.warmup_cycle          1947911714000                       # Cycle when the warmup percentage was hit.
+system.cpu1.icache.tagsinuse               445.859240                       # Cycle average of tags in use
+system.cpu1.icache.total_refs                13421057                       # Total number of references to valid blocks.
+system.cpu1.icache.warmup_cycle          1946103109000                       # Cycle when the warmup percentage was hit.
 system.cpu1.icache.writebacks                       0                       # number of writebacks
-system.cpu1.idle_fraction                    0.995322                       # Percentage of idle cycles
-system.cpu1.itb.accesses                      1399877                       # ITB accesses
-system.cpu1.itb.acv                                41                       # ITB acv
-system.cpu1.itb.hits                          1398631                       # ITB hits
-system.cpu1.itb.misses                           1246                       # ITB misses
-system.cpu1.kern.callpal                        29847                       # number of callpals executed
+system.cpu1.idle_fraction                    0.987201                       # Percentage of idle cycles
+system.cpu1.itb.accesses                      1878768                       # ITB accesses
+system.cpu1.itb.acv                                23                       # ITB acv
+system.cpu1.itb.hits                          1877648                       # ITB hits
+system.cpu1.itb.misses                           1120                       # ITB misses
+system.cpu1.kern.callpal                        75334                       # number of callpals executed
 system.cpu1.kern.callpal_cserve                     1      0.00%      0.00% # number of callpals executed
-system.cpu1.kern.callpal_wripir                     6      0.02%      0.02% # number of callpals executed
-system.cpu1.kern.callpal_wrmces                     1      0.00%      0.03% # number of callpals executed
-system.cpu1.kern.callpal_wrfen                      1      0.00%      0.03% # number of callpals executed
-system.cpu1.kern.callpal_swpctx                   375      1.26%      1.29% # number of callpals executed
-system.cpu1.kern.callpal_tbi                       10      0.03%      1.32% # number of callpals executed
-system.cpu1.kern.callpal_wrent                      7      0.02%      1.34% # number of callpals executed
-system.cpu1.kern.callpal_swpipl                 24461     81.95%     83.30% # number of callpals executed
-system.cpu1.kern.callpal_rdps                    2201      7.37%     90.67% # number of callpals executed
-system.cpu1.kern.callpal_wrkgp                      1      0.00%     90.68% # number of callpals executed
-system.cpu1.kern.callpal_wrusp                      3      0.01%     90.69% # number of callpals executed
-system.cpu1.kern.callpal_rdusp                      2      0.01%     90.69% # number of callpals executed
-system.cpu1.kern.callpal_whami                      3      0.01%     90.70% # number of callpals executed
-system.cpu1.kern.callpal_rti                     2582      8.65%     99.35% # number of callpals executed
-system.cpu1.kern.callpal_callsys                  161      0.54%     99.89% # number of callpals executed
-system.cpu1.kern.callpal_imb                       31      0.10%    100.00% # number of callpals executed
+system.cpu1.kern.callpal_wripir                   442      0.59%      0.59% # number of callpals executed
+system.cpu1.kern.callpal_wrmces                     1      0.00%      0.59% # number of callpals executed
+system.cpu1.kern.callpal_wrfen                      1      0.00%      0.59% # number of callpals executed
+system.cpu1.kern.callpal_swpctx                  2091      2.78%      3.37% # number of callpals executed
+system.cpu1.kern.callpal_tbi                        7      0.01%      3.38% # number of callpals executed
+system.cpu1.kern.callpal_wrent                      7      0.01%      3.38% # number of callpals executed
+system.cpu1.kern.callpal_swpipl                 66409     88.15%     91.54% # number of callpals executed
+system.cpu1.kern.callpal_rdps                    2344      3.11%     94.65% # number of callpals executed
+system.cpu1.kern.callpal_wrkgp                      1      0.00%     94.65% # number of callpals executed
+system.cpu1.kern.callpal_wrusp                      3      0.00%     94.65% # number of callpals executed
+system.cpu1.kern.callpal_rdusp                      1      0.00%     94.66% # number of callpals executed
+system.cpu1.kern.callpal_whami                      3      0.00%     94.66% # number of callpals executed
+system.cpu1.kern.callpal_rti                     3844      5.10%     99.76% # number of callpals executed
+system.cpu1.kern.callpal_callsys                  147      0.20%     99.96% # number of callpals executed
+system.cpu1.kern.callpal_imb                       31      0.04%    100.00% # number of callpals executed
 system.cpu1.kern.callpal_rdunique                   1      0.00%    100.00% # number of callpals executed
 system.cpu1.kern.inst.arm                           0                       # number of arm instructions executed
-system.cpu1.kern.inst.hwrei                     36385                       # number of hwrei instructions executed
-system.cpu1.kern.inst.quiesce                    2332                       # number of quiesce instructions executed
-system.cpu1.kern.ipl_count                      29103                       # number of times we switched to this ipl
-system.cpu1.kern.ipl_count_0                     9344     32.11%     32.11% # number of times we switched to this ipl
-system.cpu1.kern.ipl_count_22                    1963      6.75%     38.85% # number of times we switched to this ipl
-system.cpu1.kern.ipl_count_30                      96      0.33%     39.18% # number of times we switched to this ipl
-system.cpu1.kern.ipl_count_31                   17700     60.82%    100.00% # number of times we switched to this ipl
-system.cpu1.kern.ipl_good                       20635                       # number of times we switched to this ipl from a different ipl
-system.cpu1.kern.ipl_good_0                      9336     45.24%     45.24% # number of times we switched to this ipl from a different ipl
-system.cpu1.kern.ipl_good_22                     1963      9.51%     54.76% # number of times we switched to this ipl from a different ipl
-system.cpu1.kern.ipl_good_30                       96      0.47%     55.22% # number of times we switched to this ipl from a different ipl
-system.cpu1.kern.ipl_good_31                     9240     44.78%    100.00% # number of times we switched to this ipl from a different ipl
-system.cpu1.kern.ipl_ticks               1950372731000                       # number of cycles we spent at this ipl
-system.cpu1.kern.ipl_ticks_0             1911409272000     98.00%     98.00% # number of cycles we spent at this ipl
-system.cpu1.kern.ipl_ticks_22               494740000      0.03%     98.03% # number of cycles we spent at this ipl
-system.cpu1.kern.ipl_ticks_30                52316000      0.00%     98.03% # number of cycles we spent at this ipl
-system.cpu1.kern.ipl_ticks_31             38416403000      1.97%    100.00% # number of cycles we spent at this ipl
-system.cpu1.kern.ipl_used_0                  0.999144                       # fraction of swpipl calls that actually changed the ipl
+system.cpu1.kern.inst.hwrei                     81908                       # number of hwrei instructions executed
+system.cpu1.kern.inst.quiesce                    2770                       # number of quiesce instructions executed
+system.cpu1.kern.ipl_count                      72754                       # number of times we switched to this ipl
+system.cpu1.kern.ipl_count_0                    28089     38.61%     38.61% # number of times we switched to this ipl
+system.cpu1.kern.ipl_count_22                    1964      2.70%     41.31% # number of times we switched to this ipl
+system.cpu1.kern.ipl_count_30                     536      0.74%     42.04% # number of times we switched to this ipl
+system.cpu1.kern.ipl_count_31                   42165     57.96%    100.00% # number of times we switched to this ipl
+system.cpu1.kern.ipl_good                       56376                       # number of times we switched to this ipl from a different ipl
+system.cpu1.kern.ipl_good_0                     27206     48.26%     48.26% # number of times we switched to this ipl from a different ipl
+system.cpu1.kern.ipl_good_22                     1964      3.48%     51.74% # number of times we switched to this ipl from a different ipl
+system.cpu1.kern.ipl_good_30                      536      0.95%     52.69% # number of times we switched to this ipl from a different ipl
+system.cpu1.kern.ipl_good_31                    26670     47.31%    100.00% # number of times we switched to this ipl from a different ipl
+system.cpu1.kern.ipl_ticks               1951174446000                       # number of cycles we spent at this ipl
+system.cpu1.kern.ipl_ticks_0             1904796411500     97.62%     97.62% # number of cycles we spent at this ipl
+system.cpu1.kern.ipl_ticks_22               499877500      0.03%     97.65% # number of cycles we spent at this ipl
+system.cpu1.kern.ipl_ticks_30               327859000      0.02%     97.67% # number of cycles we spent at this ipl
+system.cpu1.kern.ipl_ticks_31             45550298000      2.33%    100.00% # number of cycles we spent at this ipl
+system.cpu1.kern.ipl_used_0                  0.968564                       # fraction of swpipl calls that actually changed the ipl
 system.cpu1.kern.ipl_used_22                        1                       # fraction of swpipl calls that actually changed the ipl
 system.cpu1.kern.ipl_used_30                        1                       # fraction of swpipl calls that actually changed the ipl
-system.cpu1.kern.ipl_used_31                 0.522034                       # fraction of swpipl calls that actually changed the ipl
-system.cpu1.kern.mode_good_kernel                 538                      
-system.cpu1.kern.mode_good_user                   517                      
-system.cpu1.kern.mode_good_idle                    21                      
-system.cpu1.kern.mode_switch_kernel               884                       # number of protection mode switches
-system.cpu1.kern.mode_switch_user                 517                       # number of protection mode switches
-system.cpu1.kern.mode_switch_idle                2075                       # number of protection mode switches
-system.cpu1.kern.mode_switch_good            1.618718                       # fraction of useful protection mode switches
-system.cpu1.kern.mode_switch_good_kernel     0.608597                       # fraction of useful protection mode switches
+system.cpu1.kern.ipl_used_31                 0.632515                       # fraction of swpipl calls that actually changed the ipl
+system.cpu1.kern.mode_good_kernel                 924                      
+system.cpu1.kern.mode_good_user                   463                      
+system.cpu1.kern.mode_good_idle                   461                      
+system.cpu1.kern.mode_switch_kernel              2120                       # number of protection mode switches
+system.cpu1.kern.mode_switch_user                 463                       # number of protection mode switches
+system.cpu1.kern.mode_switch_idle                2943                       # number of protection mode switches
+system.cpu1.kern.mode_switch_good            1.592492                       # fraction of useful protection mode switches
+system.cpu1.kern.mode_switch_good_kernel     0.435849                       # fraction of useful protection mode switches
 system.cpu1.kern.mode_switch_good_user              1                       # fraction of useful protection mode switches
-system.cpu1.kern.mode_switch_good_idle       0.010120                       # fraction of useful protection mode switches
-system.cpu1.kern.mode_ticks_kernel         3563216000      0.18%      0.18% # number of ticks spent at the given mode
-system.cpu1.kern.mode_ticks_user           1513259000      0.08%      0.26% # number of ticks spent at the given mode
-system.cpu1.kern.mode_ticks_idle         1945257297000     99.74%    100.00% # number of ticks spent at the given mode
-system.cpu1.kern.swap_context                     376                       # number of times the context was actually changed
-system.cpu1.kern.syscall                          102                       # number of syscalls executed
-system.cpu1.kern.syscall_2                          2      1.96%      1.96% # number of syscalls executed
-system.cpu1.kern.syscall_3                         11     10.78%     12.75% # number of syscalls executed
-system.cpu1.kern.syscall_4                          1      0.98%     13.73% # number of syscalls executed
-system.cpu1.kern.syscall_6                         12     11.76%     25.49% # number of syscalls executed
-system.cpu1.kern.syscall_17                         5      4.90%     30.39% # number of syscalls executed
-system.cpu1.kern.syscall_19                         4      3.92%     34.31% # number of syscalls executed
-system.cpu1.kern.syscall_20                         2      1.96%     36.27% # number of syscalls executed
-system.cpu1.kern.syscall_23                         2      1.96%     38.24% # number of syscalls executed
-system.cpu1.kern.syscall_24                         2      1.96%     40.20% # number of syscalls executed
-system.cpu1.kern.syscall_33                         3      2.94%     43.14% # number of syscalls executed
-system.cpu1.kern.syscall_45                        15     14.71%     57.84% # number of syscalls executed
-system.cpu1.kern.syscall_47                         2      1.96%     59.80% # number of syscalls executed
-system.cpu1.kern.syscall_48                         3      2.94%     62.75% # number of syscalls executed
-system.cpu1.kern.syscall_54                         1      0.98%     63.73% # number of syscalls executed
-system.cpu1.kern.syscall_59                         2      1.96%     65.69% # number of syscalls executed
-system.cpu1.kern.syscall_71                        22     21.57%     87.25% # number of syscalls executed
-system.cpu1.kern.syscall_74                         7      6.86%     94.12% # number of syscalls executed
-system.cpu1.kern.syscall_90                         1      0.98%     95.10% # number of syscalls executed
-system.cpu1.kern.syscall_92                         2      1.96%     97.06% # number of syscalls executed
-system.cpu1.kern.syscall_132                        2      1.96%     99.02% # number of syscalls executed
-system.cpu1.kern.syscall_144                        1      0.98%    100.00% # number of syscalls executed
-system.cpu1.not_idle_fraction                0.004678                       # Percentage of non-idle cycles
-system.cpu1.numCycles                    1950372761000                       # number of cpu cycles simulated
-system.cpu1.num_insts                         5376264                       # Number of instructions executed
-system.cpu1.num_refs                          1738417                       # Number of memory references
+system.cpu1.kern.mode_switch_good_idle       0.156643                       # fraction of useful protection mode switches
+system.cpu1.kern.mode_ticks_kernel        18594859000      0.95%      0.95% # number of ticks spent at the given mode
+system.cpu1.kern.mode_ticks_user           1499702000      0.08%      1.03% # number of ticks spent at the given mode
+system.cpu1.kern.mode_ticks_idle         1930131145000     98.97%    100.00% # number of ticks spent at the given mode
+system.cpu1.kern.swap_context                    2092                       # number of times the context was actually changed
+system.cpu1.kern.syscall                           98                       # number of syscalls executed
+system.cpu1.kern.syscall_2                          1      1.02%      1.02% # number of syscalls executed
+system.cpu1.kern.syscall_3                         11     11.22%     12.24% # number of syscalls executed
+system.cpu1.kern.syscall_4                          1      1.02%     13.27% # number of syscalls executed
+system.cpu1.kern.syscall_6                         11     11.22%     24.49% # number of syscalls executed
+system.cpu1.kern.syscall_17                         5      5.10%     29.59% # number of syscalls executed
+system.cpu1.kern.syscall_19                         4      4.08%     33.67% # number of syscalls executed
+system.cpu1.kern.syscall_20                         2      2.04%     35.71% # number of syscalls executed
+system.cpu1.kern.syscall_23                         2      2.04%     37.76% # number of syscalls executed
+system.cpu1.kern.syscall_24                         2      2.04%     39.80% # number of syscalls executed
+system.cpu1.kern.syscall_33                         3      3.06%     42.86% # number of syscalls executed
+system.cpu1.kern.syscall_45                        15     15.31%     58.16% # number of syscalls executed
+system.cpu1.kern.syscall_47                         2      2.04%     60.20% # number of syscalls executed
+system.cpu1.kern.syscall_48                         2      2.04%     62.24% # number of syscalls executed
+system.cpu1.kern.syscall_54                         1      1.02%     63.27% # number of syscalls executed
+system.cpu1.kern.syscall_59                         1      1.02%     64.29% # number of syscalls executed
+system.cpu1.kern.syscall_71                        22     22.45%     86.73% # number of syscalls executed
+system.cpu1.kern.syscall_74                         7      7.14%     93.88% # number of syscalls executed
+system.cpu1.kern.syscall_90                         1      1.02%     94.90% # number of syscalls executed
+system.cpu1.kern.syscall_92                         2      2.04%     96.94% # number of syscalls executed
+system.cpu1.kern.syscall_132                        2      2.04%     98.98% # number of syscalls executed
+system.cpu1.kern.syscall_144                        1      1.02%    100.00% # number of syscalls executed
+system.cpu1.not_idle_fraction                0.012799                       # Percentage of non-idle cycles
+system.cpu1.numCycles                    1951174476000                       # number of cpu cycles simulated
+system.cpu1.num_insts                        13758345                       # Number of instructions executed
+system.cpu1.num_refs                          4385954                       # Number of memory references
 system.disk0.dma_read_bytes                      1024                       # Number of bytes transfered via DMA reads (not PRD).
 system.disk0.dma_read_full_pages                    0                       # Number of full page size DMA reads (not PRD).
 system.disk0.dma_read_txs                           1                       # Number of DMA read transactions (not PRD).
@@ -610,75 +543,80 @@ system.disk2.dma_read_txs                           0                       # Nu
 system.disk2.dma_write_bytes                     8192                       # Number of bytes transfered via DMA writes.
 system.disk2.dma_write_full_pages                   1                       # Number of full page size DMA writes.
 system.disk2.dma_write_txs                          1                       # Number of DMA write transactions.
-system.l2c.ReadExReq_accesses                  306499                       # number of ReadExReq accesses(hits+misses)
-system.l2c.ReadExReq_avg_miss_latency    12998.029396                       # average ReadExReq miss latency
-system.l2c.ReadExReq_avg_mshr_miss_latency 10997.988681                       # average ReadExReq mshr miss latency
-system.l2c.ReadExReq_hits                      183694                       # number of ReadExReq hits
-system.l2c.ReadExReq_miss_latency          1596223000                       # number of ReadExReq miss cycles
-system.l2c.ReadExReq_miss_rate               0.400670                       # miss rate for ReadExReq accesses
-system.l2c.ReadExReq_misses                    122805                       # number of ReadExReq misses
-system.l2c.ReadExReq_mshr_miss_latency     1350608000                       # number of ReadExReq MSHR miss cycles
-system.l2c.ReadExReq_mshr_miss_rate          0.400670                       # mshr miss rate for ReadExReq accesses
-system.l2c.ReadExReq_mshr_misses               122805                       # number of ReadExReq MSHR misses
-system.l2c.ReadReq_accesses                   2751323                       # number of ReadReq accesses(hits+misses)
-system.l2c.ReadReq_avg_miss_latency      12999.901707                       # average ReadReq miss latency
-system.l2c.ReadReq_avg_mshr_miss_latency 10999.990968                       # average ReadReq mshr miss latency
-system.l2c.ReadReq_hits                       1810263                       # number of ReadReq hits
-system.l2c.ReadReq_miss_latency           12233687500                       # number of ReadReq miss cycles
-system.l2c.ReadReq_miss_rate                 0.342039                       # miss rate for ReadReq accesses
-system.l2c.ReadReq_misses                      941060                       # number of ReadReq misses
-system.l2c.ReadReq_mshr_hits                       11                       # number of ReadReq MSHR hits
-system.l2c.ReadReq_mshr_miss_latency      10351530500                       # number of ReadReq MSHR miss cycles
-system.l2c.ReadReq_mshr_miss_rate            0.342035                       # mshr miss rate for ReadReq accesses
-system.l2c.ReadReq_mshr_misses                 941049                       # number of ReadReq MSHR misses
-system.l2c.ReadReq_mshr_uncacheable              6993                       # number of ReadReq MSHR uncacheable
-system.l2c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.l2c.ReadResp_mshr_uncacheable_latency    779629500                       # number of ReadResp MSHR uncacheable cycles
-system.l2c.WriteReq_mshr_uncacheable            12194                       # number of WriteReq MSHR uncacheable
-system.l2c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.l2c.WriteResp_mshr_uncacheable_latency   1356619000                       # number of WriteResp MSHR uncacheable cycles
-system.l2c.Writeback_accesses                  429269                       # number of Writeback accesses(hits+misses)
-system.l2c.Writeback_hits                      429256                       # number of Writeback hits
-system.l2c.Writeback_miss_rate               0.000030                       # miss rate for Writeback accesses
-system.l2c.Writeback_misses                        13                       # number of Writeback misses
-system.l2c.Writeback_mshr_miss_rate          0.000030                       # mshr miss rate for Writeback accesses
-system.l2c.Writeback_mshr_misses                   13                       # number of Writeback MSHR misses
+system.l2c.ReadExReq_accesses                  297979                       # number of ReadExReq accesses(hits+misses)
+system.l2c.ReadExReq_avg_miss_latency    12000.808782                       # average ReadExReq miss latency
+system.l2c.ReadExReq_avg_mshr_miss_latency 11000.808782                       # average ReadExReq mshr miss latency
+system.l2c.ReadExReq_miss_latency          3575989000                       # number of ReadExReq miss cycles
+system.l2c.ReadExReq_miss_rate                      1                       # miss rate for ReadExReq accesses
+system.l2c.ReadExReq_misses                    297979                       # number of ReadExReq misses
+system.l2c.ReadExReq_mshr_miss_latency     3278010000                       # number of ReadExReq MSHR miss cycles
+system.l2c.ReadExReq_mshr_miss_rate                 1                       # mshr miss rate for ReadExReq accesses
+system.l2c.ReadExReq_mshr_misses               297979                       # number of ReadExReq MSHR misses
+system.l2c.ReadReq_accesses                   2726406                       # number of ReadReq accesses(hits+misses)
+system.l2c.ReadReq_avg_miss_latency      12000.355770                       # average ReadReq miss latency
+system.l2c.ReadReq_avg_mshr_miss_latency 11000.235046                       # average ReadReq mshr miss latency
+system.l2c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.l2c.ReadReq_hits                       1633004                       # number of ReadReq hits
+system.l2c.ReadReq_miss_latency           13121213000                       # number of ReadReq miss cycles
+system.l2c.ReadReq_miss_rate                 0.401042                       # miss rate for ReadReq accesses
+system.l2c.ReadReq_misses                     1093402                       # number of ReadReq misses
+system.l2c.ReadReq_mshr_hits                       12                       # number of ReadReq MSHR hits
+system.l2c.ReadReq_mshr_miss_latency      12027679000                       # number of ReadReq MSHR miss cycles
+system.l2c.ReadReq_mshr_miss_rate            0.401042                       # mshr miss rate for ReadReq accesses
+system.l2c.ReadReq_mshr_misses                1093402                       # number of ReadReq MSHR misses
+system.l2c.ReadReq_mshr_uncacheable_latency    779744500                       # number of ReadReq MSHR uncacheable cycles
+system.l2c.UpgradeReq_accesses                 125211                       # number of UpgradeReq accesses(hits+misses)
+system.l2c.UpgradeReq_avg_miss_latency   11388.943463                       # average UpgradeReq miss latency
+system.l2c.UpgradeReq_avg_mshr_miss_latency 11003.410244                       # average UpgradeReq mshr miss latency
+system.l2c.UpgradeReq_miss_latency         1426021000                       # number of UpgradeReq miss cycles
+system.l2c.UpgradeReq_miss_rate                     1                       # miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_misses                   125211                       # number of UpgradeReq misses
+system.l2c.UpgradeReq_mshr_miss_latency    1377748000                       # number of UpgradeReq MSHR miss cycles
+system.l2c.UpgradeReq_mshr_miss_rate                1                       # mshr miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_mshr_misses              125211                       # number of UpgradeReq MSHR misses
+system.l2c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.l2c.WriteReq_mshr_uncacheable_latency   1551434500                       # number of WriteReq MSHR uncacheable cycles
+system.l2c.Writeback_accesses                  416193                       # number of Writeback accesses(hits+misses)
+system.l2c.Writeback_miss_rate                      1                       # miss rate for Writeback accesses
+system.l2c.Writeback_misses                    416193                       # number of Writeback misses
+system.l2c.Writeback_mshr_miss_rate                 1                       # mshr miss rate for Writeback accesses
+system.l2c.Writeback_mshr_misses               416193                       # number of Writeback MSHR misses
 system.l2c.avg_blocked_cycles_no_mshrs   <err: div-0>                       # average number of cycles each access was blocked
 system.l2c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.l2c.avg_refs                          2.277768                       # Average number of references to valid blocks.
+system.l2c.avg_refs                          1.713697                       # Average number of references to valid blocks.
 system.l2c.blocked_no_mshrs                         0                       # number of cycles access was blocked
 system.l2c.blocked_no_targets                       0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_mshrs                  0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_targets                0                       # number of cycles access was blocked
 system.l2c.cache_copies                             0                       # number of cache copies performed
-system.l2c.demand_accesses                    2751323                       # number of demand (read+write) accesses
-system.l2c.demand_avg_miss_latency       12999.901707                       # average overall miss latency
-system.l2c.demand_avg_mshr_miss_latency  10999.990968                       # average overall mshr miss latency
-system.l2c.demand_hits                        1810263                       # number of demand (read+write) hits
-system.l2c.demand_miss_latency            12233687500                       # number of demand (read+write) miss cycles
-system.l2c.demand_miss_rate                  0.342039                       # miss rate for demand accesses
-system.l2c.demand_misses                       941060                       # number of demand (read+write) misses
-system.l2c.demand_mshr_hits                        11                       # number of demand (read+write) MSHR hits
-system.l2c.demand_mshr_miss_latency       10351530500                       # number of demand (read+write) MSHR miss cycles
-system.l2c.demand_mshr_miss_rate             0.342035                       # mshr miss rate for demand accesses
-system.l2c.demand_mshr_misses                  941049                       # number of demand (read+write) MSHR misses
+system.l2c.demand_accesses                    3024385                       # number of demand (read+write) accesses
+system.l2c.demand_avg_miss_latency       12000.452788                       # average overall miss latency
+system.l2c.demand_avg_mshr_miss_latency  11000.357918                       # average overall mshr miss latency
+system.l2c.demand_hits                        1633004                       # number of demand (read+write) hits
+system.l2c.demand_miss_latency            16697202000                       # number of demand (read+write) miss cycles
+system.l2c.demand_miss_rate                  0.460054                       # miss rate for demand accesses
+system.l2c.demand_misses                      1391381                       # number of demand (read+write) misses
+system.l2c.demand_mshr_hits                        12                       # number of demand (read+write) MSHR hits
+system.l2c.demand_mshr_miss_latency       15305689000                       # number of demand (read+write) MSHR miss cycles
+system.l2c.demand_mshr_miss_rate             0.460054                       # mshr miss rate for demand accesses
+system.l2c.demand_mshr_misses                 1391381                       # number of demand (read+write) MSHR misses
 system.l2c.fast_writes                              0                       # number of fast writes performed
 system.l2c.mshr_cap_events                          0                       # number of times MSHR cap was activated
 system.l2c.no_allocate_misses                       0                       # Number of misses that were no-allocate
-system.l2c.overall_accesses                   3180592                       # number of overall (read+write) accesses
-system.l2c.overall_avg_miss_latency      12999.722126                       # average overall miss latency
-system.l2c.overall_avg_mshr_miss_latency 10999.990968                       # average overall mshr miss latency
-system.l2c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.l2c.overall_hits                       2239519                       # number of overall hits
-system.l2c.overall_miss_latency           12233687500                       # number of overall miss cycles
-system.l2c.overall_miss_rate                 0.295880                       # miss rate for overall accesses
-system.l2c.overall_misses                      941073                       # number of overall misses
-system.l2c.overall_mshr_hits                       11                       # number of overall MSHR hits
-system.l2c.overall_mshr_miss_latency      10351530500                       # number of overall MSHR miss cycles
-system.l2c.overall_mshr_miss_rate            0.295872                       # mshr miss rate for overall accesses
-system.l2c.overall_mshr_misses                 941049                       # number of overall MSHR misses
-system.l2c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.l2c.overall_mshr_uncacheable_misses        19187                       # number of overall MSHR uncacheable misses
+system.l2c.overall_accesses                   3024385                       # number of overall (read+write) accesses
+system.l2c.overall_avg_miss_latency      12000.452788                       # average overall miss latency
+system.l2c.overall_avg_mshr_miss_latency 11000.357918                       # average overall mshr miss latency
+system.l2c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.l2c.overall_hits                       1633004                       # number of overall hits
+system.l2c.overall_miss_latency           16697202000                       # number of overall miss cycles
+system.l2c.overall_miss_rate                 0.460054                       # miss rate for overall accesses
+system.l2c.overall_misses                     1391381                       # number of overall misses
+system.l2c.overall_mshr_hits                       12                       # number of overall MSHR hits
+system.l2c.overall_mshr_miss_latency      15305689000                       # number of overall MSHR miss cycles
+system.l2c.overall_mshr_miss_rate            0.460054                       # mshr miss rate for overall accesses
+system.l2c.overall_mshr_misses                1391381                       # number of overall MSHR misses
+system.l2c.overall_mshr_uncacheable_latency   2331179000                       # number of overall MSHR uncacheable cycles
+system.l2c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.l2c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.l2c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.l2c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -688,13 +626,13 @@ system.l2c.prefetcher.num_hwpf_issued               0                       # nu
 system.l2c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.l2c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.l2c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.l2c.replacements                        998318                       # number of replacements
-system.l2c.sampled_refs                       1063854                       # Sample count of references to valid blocks.
+system.l2c.replacements                        947502                       # number of replacements
+system.l2c.sampled_refs                        965785                       # Sample count of references to valid blocks.
 system.l2c.soft_prefetch_mshr_full                  0                       # number of mshr full events for SW prefetching instrutions
-system.l2c.tagsinuse                     65469.787238                       # Cycle average of tags in use
-system.l2c.total_refs                         2423213                       # Total number of references to valid blocks.
-system.l2c.warmup_cycle                    3064127000                       # Cycle when the warmup percentage was hit.
-system.l2c.writebacks                           79556                       # number of writebacks
+system.l2c.tagsinuse                     16369.951624                       # Cycle average of tags in use
+system.l2c.total_refs                         1655063                       # Total number of references to valid blocks.
+system.l2c.warmup_cycle                    5421925000                       # Cycle when the warmup percentage was hit.
+system.l2c.writebacks                               0                       # number of writebacks
 system.tsunami.ethernet.coalescedRxDesc  <err: div-0>                       # average number of RxDesc's coalesced into each post
 system.tsunami.ethernet.coalescedRxIdle  <err: div-0>                       # average number of RxIdle's coalesced into each post
 system.tsunami.ethernet.coalescedRxOk    <err: div-0>                       # average number of RxOk's coalesced into each post
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr
index af0df3710..e6ad9b469 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr
@@ -1,5 +1,5 @@
-Listening for system connection on port 3456
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
-0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001
+Listening for system connection on port 3457
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
+0: system.remote_gdb.listener: listening for remote gdb on port 7002
 warn: Entering event queue @ 0.  Starting simulation...
-warn: 423901000: Trying to launch CPU number 1!
+warn: 427086000: Trying to launch CPU number 1!
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout
index 68b58c461..99539f3ea 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout
@@ -5,10 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:10:03
-M5 started Mon Jun 11 01:30:38 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
+M5 compiled Aug  3 2007 04:02:11
+M5 started Fri Aug  3 04:25:10 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
 Global frequency set at 1000000000000 ticks per second
-      0: system.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
-Exiting @ tick 1951129131000 because m5_exit instruction encountered
+Exiting @ tick 1951367346000 because m5_exit instruction encountered
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini
index c726f11fe..1992f65a2 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini
@@ -35,7 +35,7 @@ side_b=system.membus.port[0]
 
 [system.cpu]
 type=TimingSimpleCPU
-children=dcache dtb icache itb
+children=dcache dtb icache itb tracer
 clock=500
 cpu_id=0
 defer_registration=false
@@ -54,17 +54,15 @@ phase=0
 profile=0
 progress_interval=0
 system=system
+tracer=system.cpu.tracer
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 
 [system.cpu.dcache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -82,12 +80,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu.dcache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -96,23 +92,15 @@ write_buffers=8
 cpu_side=system.cpu.dcache_port
 mem_side=system.toL2Bus.port[2]
 
-[system.cpu.dcache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu.dtb]
 type=AlphaDTB
 size=64
 
 [system.cpu.icache]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=1
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -130,12 +118,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu.icache.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -144,15 +130,13 @@ write_buffers=8
 cpu_side=system.cpu.icache_port
 mem_side=system.toL2Bus.port[1]
 
-[system.cpu.icache.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu.itb]
 type=AlphaITB
 size=48
 
+[system.cpu.tracer]
+type=ExeTracer
+
 [system.disk0]
 type=IdeDisk
 children=image
@@ -207,11 +191,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst
 
 [system.l2c]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=8
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -229,12 +211,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=4194304
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=16
 trace_addr=0
@@ -798,7 +778,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=system.tsunami
 system=system
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=system.tsunami
 year_is_bcd=false
 pio=system.iobus.port[23]
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt
index f72789e4b..958246a30 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt
@@ -1,74 +1,93 @@
 
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  62427                       # Simulator instruction rate (inst/s)
-host_seconds                                   961.73                       # Real time elapsed on the host
-host_tick_rate                             1983042717                       # Simulator tick rate (ticks/s)
+host_inst_rate                                 631972                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 219140                       # Number of bytes of host memory used
+host_seconds                                    95.00                       # Real time elapsed on the host
+host_tick_rate                            20109299069                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                    60037406                       # Number of instructions simulated
-sim_seconds                                  1.907146                       # Number of seconds simulated
-sim_ticks                                1907146437000                       # Number of ticks simulated
-system.cpu.dcache.ReadReq_accesses            9726331                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 13065.219101                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12065.192690                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                7984648                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency    22755470000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.179069                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses              1741683                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency  21013741000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.179069                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses         1741683                       # number of ReadReq MSHR misses
-system.cpu.dcache.ReadReq_mshr_uncacheable         6727                       # number of ReadReq MSHR uncacheable
-system.cpu.dcache.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu.dcache.ReadResp_mshr_uncacheable_latency    824099000                       # number of ReadResp MSHR uncacheable cycles
-system.cpu.dcache.WriteReq_accesses           6350552                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 12768.106941                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11768.067509                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits               6046235                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency    3885552000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.047920                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses              304317                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency   3581223000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.047920                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses         304317                       # number of WriteReq MSHR misses
-system.cpu.dcache.WriteReq_mshr_uncacheable         9438                       # number of WriteReq MSHR uncacheable
-system.cpu.dcache.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu.dcache.WriteResp_mshr_uncacheable_latency   1154484000                       # number of WriteResp MSHR uncacheable cycles
+sim_insts                                    60034774                       # Number of instructions simulated
+sim_seconds                                  1.910310                       # Number of seconds simulated
+sim_ticks                                1910309711000                       # Number of ticks simulated
+system.cpu.dcache.LoadLockedReq_accesses       200211                       # number of LoadLockedReq accesses(hits+misses)
+system.cpu.dcache.LoadLockedReq_avg_miss_latency 13960.656682                       # average LoadLockedReq miss latency
+system.cpu.dcache.LoadLockedReq_avg_mshr_miss_latency 12960.656682                       # average LoadLockedReq mshr miss latency
+system.cpu.dcache.LoadLockedReq_hits           182851                       # number of LoadLockedReq hits
+system.cpu.dcache.LoadLockedReq_miss_latency    242357000                       # number of LoadLockedReq miss cycles
+system.cpu.dcache.LoadLockedReq_miss_rate     0.086709                       # miss rate for LoadLockedReq accesses
+system.cpu.dcache.LoadLockedReq_misses          17360                       # number of LoadLockedReq misses
+system.cpu.dcache.LoadLockedReq_mshr_miss_latency    224997000                       # number of LoadLockedReq MSHR miss cycles
+system.cpu.dcache.LoadLockedReq_mshr_miss_rate     0.086709                       # mshr miss rate for LoadLockedReq accesses
+system.cpu.dcache.LoadLockedReq_mshr_misses        17360                       # number of LoadLockedReq MSHR misses
+system.cpu.dcache.ReadReq_accesses            9525872                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 13240.454388                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12240.427719                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu.dcache.ReadReq_hits                7801048                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency    22837453500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.181067                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses              1724824                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency  21112583500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.181067                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_misses         1724824                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_uncacheable_latency    830826000                       # number of ReadReq MSHR uncacheable cycles
+system.cpu.dcache.StoreCondReq_accesses        199189                       # number of StoreCondReq accesses(hits+misses)
+system.cpu.dcache.StoreCondReq_avg_miss_latency 14000.798456                       # average StoreCondReq miss latency
+system.cpu.dcache.StoreCondReq_avg_mshr_miss_latency 13000.798456                       # average StoreCondReq mshr miss latency
+system.cpu.dcache.StoreCondReq_hits            169131                       # number of StoreCondReq hits
+system.cpu.dcache.StoreCondReq_miss_latency    420836000                       # number of StoreCondReq miss cycles
+system.cpu.dcache.StoreCondReq_miss_rate     0.150902                       # miss rate for StoreCondReq accesses
+system.cpu.dcache.StoreCondReq_misses           30058                       # number of StoreCondReq misses
+system.cpu.dcache.StoreCondReq_mshr_miss_latency    390778000                       # number of StoreCondReq MSHR miss cycles
+system.cpu.dcache.StoreCondReq_mshr_miss_rate     0.150902                       # mshr miss rate for StoreCondReq accesses
+system.cpu.dcache.StoreCondReq_mshr_misses        30058                       # number of StoreCondReq MSHR misses
+system.cpu.dcache.WriteReq_accesses           6151132                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency 14000.947966                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000.947966                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu.dcache.WriteReq_hits               5750801                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_miss_latency    5605013500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_rate         0.065082                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses              400331                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_mshr_miss_latency   5204682500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.065082                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_misses         400331                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_mshr_uncacheable_latency   1164414500                       # number of WriteReq MSHR uncacheable cycles
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                   6.857760                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                   6.854770                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses            16076883                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 13021.027370                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 12020.999022                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                14030883                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency     26641022000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.127263                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses               2046000                       # number of demand (read+write) misses
+system.cpu.dcache.demand_accesses            15677004                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 13383.714129                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 12383.692484                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                13551849                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency     28442467000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.135559                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses               2125155                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency  24594964000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.127263                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses          2046000                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_miss_latency  26317266000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.135559                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_misses          2125155                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses           16076883                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 13021.027370                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 12020.999022                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits               14030883                       # number of overall hits
-system.cpu.dcache.overall_miss_latency    26641022000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.127263                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses              2046000                       # number of overall misses
+system.cpu.dcache.overall_accesses           15677004                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 13383.714129                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 12383.692484                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu.dcache.overall_hits               13551849                       # number of overall hits
+system.cpu.dcache.overall_miss_latency    28442467000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.135559                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses              2125155                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency  24594964000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.127263                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses         2046000                       # number of overall MSHR misses
-system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu.dcache.overall_mshr_uncacheable_misses        16165                       # number of overall MSHR uncacheable misses
+system.cpu.dcache.overall_mshr_miss_latency  26317266000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.135559                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_misses         2125155                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_uncacheable_latency   1995240500                       # number of overall MSHR uncacheable cycles
+system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -78,95 +97,69 @@ system.cpu.dcache.prefetcher.num_hwpf_issued            0
 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.dcache.protocol.hwpf_invalid             0                       # hard prefetch misses to invalid blocks
-system.cpu.dcache.protocol.read_invalid       1741683                       # read misses to invalid blocks
-system.cpu.dcache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu.dcache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu.dcache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu.dcache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu.dcache.protocol.snoop_read_exclusive            9                       # read snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_read_modified           15                       # read snoops on modified blocks
-system.cpu.dcache.protocol.snoop_read_owned            4                       # read snoops on owned blocks
-system.cpu.dcache.protocol.snoop_read_shared           92                       # read snoops on shared blocks
-system.cpu.dcache.protocol.snoop_readex_exclusive            0                       # readEx snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu.dcache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu.dcache.protocol.snoop_readex_shared            0                       # readEx snoops on shared blocks
-system.cpu.dcache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu.dcache.protocol.snoop_upgrade_shared            0                       # upgradee snoops on shared blocks
-system.cpu.dcache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu.dcache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu.dcache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu.dcache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu.dcache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu.dcache.protocol.swpf_invalid             0                       # soft prefetch misses to invalid blocks
-system.cpu.dcache.protocol.write_invalid       304305                       # write misses to invalid blocks
-system.cpu.dcache.protocol.write_owned              8                       # write misses to owned blocks
-system.cpu.dcache.protocol.write_shared             4                       # write misses to shared blocks
-system.cpu.dcache.replacements                2045476                       # number of replacements
-system.cpu.dcache.sampled_refs                2045988                       # Sample count of references to valid blocks.
+system.cpu.dcache.replacements                2046194                       # number of replacements
+system.cpu.dcache.sampled_refs                2046706                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                511.987904                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                 14030895                       # Total number of references to valid blocks.
-system.cpu.dcache.warmup_cycle               57945000                       # Cycle when the warmup percentage was hit.
-system.cpu.dcache.writebacks                   429989                       # number of writebacks
+system.cpu.dcache.tagsinuse                511.987834                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                 14029698                       # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle               58297000                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.writebacks                   429991                       # number of writebacks
 system.cpu.dtb.accesses                       1020787                       # DTB accesses
 system.cpu.dtb.acv                                367                       # DTB access violations
-system.cpu.dtb.hits                          16057425                       # DTB hits
+system.cpu.dtb.hits                          16056951                       # DTB hits
 system.cpu.dtb.misses                           11471                       # DTB misses
 system.cpu.dtb.read_accesses                   728856                       # DTB read accesses
 system.cpu.dtb.read_acv                           210                       # DTB read access violations
-system.cpu.dtb.read_hits                      9706740                       # DTB read hits
+system.cpu.dtb.read_hits                      9706492                       # DTB read hits
 system.cpu.dtb.read_misses                      10329                       # DTB read misses
 system.cpu.dtb.write_accesses                  291931                       # DTB write accesses
 system.cpu.dtb.write_acv                          157                       # DTB write access violations
-system.cpu.dtb.write_hits                     6350685                       # DTB write hits
+system.cpu.dtb.write_hits                     6350459                       # DTB write hits
 system.cpu.dtb.write_misses                      1142                       # DTB write misses
-system.cpu.icache.ReadReq_accesses           60037407                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 12029.456206                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 11028.713640                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits               59110217                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency    11153591500                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.015444                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses               927190                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency  10225713000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.015444                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses          927190                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_accesses           60034775                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 12033.060657                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 11032.326155                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits               59106935                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency    11164755000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.015455                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses               927840                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_miss_latency  10236233500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.015455                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses          927840                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  63.763003                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  63.714789                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses            60037407                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 12029.456206                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 11028.713640                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                59110217                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency     11153591500                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.015444                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                927190                       # number of demand (read+write) misses
+system.cpu.icache.demand_accesses            60034775                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 12033.060657                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 11032.326155                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                59106935                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency     11164755000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.015455                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                927840                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency  10225713000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.015444                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses           927190                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_miss_latency  10236233500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.015455                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses           927840                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses           60037407                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 12029.456206                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 11028.713640                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses           60034775                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 12033.060657                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 11032.326155                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits               59110217                       # number of overall hits
-system.cpu.icache.overall_miss_latency    11153591500                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.015444                       # miss rate for overall accesses
-system.cpu.icache.overall_misses               927190                       # number of overall misses
+system.cpu.icache.overall_hits               59106935                       # number of overall hits
+system.cpu.icache.overall_miss_latency    11164755000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.015455                       # miss rate for overall accesses
+system.cpu.icache.overall_misses               927840                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency  10225713000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.015444                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses          927190                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_miss_latency  10236233500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.015455                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses          927840                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -178,45 +171,19 @@ system.cpu.icache.prefetcher.num_hwpf_issued            0
 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.icache.protocol.hwpf_invalid             0                       # hard prefetch misses to invalid blocks
-system.cpu.icache.protocol.read_invalid        927190                       # read misses to invalid blocks
-system.cpu.icache.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu.icache.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu.icache.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu.icache.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu.icache.protocol.snoop_read_exclusive          644                       # read snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_read_modified            0                       # read snoops on modified blocks
-system.cpu.icache.protocol.snoop_read_owned            0                       # read snoops on owned blocks
-system.cpu.icache.protocol.snoop_read_shared         1040                       # read snoops on shared blocks
-system.cpu.icache.protocol.snoop_readex_exclusive          146                       # readEx snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_readex_modified            0                       # readEx snoops on modified blocks
-system.cpu.icache.protocol.snoop_readex_owned            0                       # readEx snoops on owned blocks
-system.cpu.icache.protocol.snoop_readex_shared            2                       # readEx snoops on shared blocks
-system.cpu.icache.protocol.snoop_upgrade_owned            0                       # upgrade snoops on owned blocks
-system.cpu.icache.protocol.snoop_upgrade_shared           12                       # upgradee snoops on shared blocks
-system.cpu.icache.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu.icache.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu.icache.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu.icache.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu.icache.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu.icache.protocol.swpf_invalid             0                       # soft prefetch misses to invalid blocks
-system.cpu.icache.protocol.write_invalid            0                       # write misses to invalid blocks
-system.cpu.icache.protocol.write_owned              0                       # write misses to owned blocks
-system.cpu.icache.protocol.write_shared             0                       # write misses to shared blocks
-system.cpu.icache.replacements                 926519                       # number of replacements
-system.cpu.icache.sampled_refs                 927030                       # Sample count of references to valid blocks.
+system.cpu.icache.replacements                 927169                       # number of replacements
+system.cpu.icache.sampled_refs                 927680                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                508.761542                       # Cycle average of tags in use
-system.cpu.icache.total_refs                 59110217                       # Total number of references to valid blocks.
-system.cpu.icache.warmup_cycle            34634685000                       # Cycle when the warmup percentage was hit.
+system.cpu.icache.tagsinuse                508.749374                       # Cycle average of tags in use
+system.cpu.icache.total_refs                 59106935                       # Total number of references to valid blocks.
+system.cpu.icache.warmup_cycle            35000367000                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idle_fraction                     0.940784                       # Percentage of idle cycles
-system.cpu.itb.accesses                       4977586                       # ITB accesses
+system.cpu.idle_fraction                     0.939637                       # Percentage of idle cycles
+system.cpu.itb.accesses                       4978395                       # ITB accesses
 system.cpu.itb.acv                                184                       # ITB acv
-system.cpu.itb.hits                           4972580                       # ITB hits
+system.cpu.itb.hits                           4973389                       # ITB hits
 system.cpu.itb.misses                            5006                       # ITB misses
-system.cpu.kern.callpal                        192752                       # number of callpals executed
+system.cpu.kern.callpal                        192813                       # number of callpals executed
 system.cpu.kern.callpal_cserve                      1      0.00%      0.00% # number of callpals executed
 system.cpu.kern.callpal_wrmces                      1      0.00%      0.00% # number of callpals executed
 system.cpu.kern.callpal_wrfen                       1      0.00%      0.00% # number of callpals executed
@@ -224,50 +191,50 @@ system.cpu.kern.callpal_wrvptptr                    1      0.00%      0.00% # nu
 system.cpu.kern.callpal_swpctx                   4176      2.17%      2.17% # number of callpals executed
 system.cpu.kern.callpal_tbi                        54      0.03%      2.20% # number of callpals executed
 system.cpu.kern.callpal_wrent                       7      0.00%      2.20% # number of callpals executed
-system.cpu.kern.callpal_swpipl                 175824     91.22%     93.42% # number of callpals executed
-system.cpu.kern.callpal_rdps                     6824      3.54%     96.96% # number of callpals executed
+system.cpu.kern.callpal_swpipl                 175877     91.22%     93.42% # number of callpals executed
+system.cpu.kern.callpal_rdps                     6828      3.54%     96.96% # number of callpals executed
 system.cpu.kern.callpal_wrkgp                       1      0.00%     96.96% # number of callpals executed
 system.cpu.kern.callpal_wrusp                       7      0.00%     96.96% # number of callpals executed
 system.cpu.kern.callpal_rdusp                       9      0.00%     96.97% # number of callpals executed
 system.cpu.kern.callpal_whami                       2      0.00%     96.97% # number of callpals executed
-system.cpu.kern.callpal_rti                      5148      2.67%     99.64% # number of callpals executed
+system.cpu.kern.callpal_rti                      5152      2.67%     99.64% # number of callpals executed
 system.cpu.kern.callpal_callsys                   515      0.27%     99.91% # number of callpals executed
 system.cpu.kern.callpal_imb                       181      0.09%    100.00% # number of callpals executed
 system.cpu.kern.inst.arm                            0                       # number of arm instructions executed
-system.cpu.kern.inst.hwrei                     211836                       # number of hwrei instructions executed
+system.cpu.kern.inst.hwrei                     211901                       # number of hwrei instructions executed
 system.cpu.kern.inst.quiesce                     6181                       # number of quiesce instructions executed
-system.cpu.kern.ipl_count                      183027                       # number of times we switched to this ipl
-system.cpu.kern.ipl_count_0                     74862     40.90%     40.90% # number of times we switched to this ipl
+system.cpu.kern.ipl_count                      183088                       # number of times we switched to this ipl
+system.cpu.kern.ipl_count_0                     74875     40.90%     40.90% # number of times we switched to this ipl
 system.cpu.kern.ipl_count_21                      131      0.07%     40.97% # number of times we switched to this ipl
-system.cpu.kern.ipl_count_22                     1923      1.05%     42.02% # number of times we switched to this ipl
-system.cpu.kern.ipl_count_31                   106111     57.98%    100.00% # number of times we switched to this ipl
-system.cpu.kern.ipl_good                       149044                       # number of times we switched to this ipl from a different ipl
-system.cpu.kern.ipl_good_0                      73495     49.31%     49.31% # number of times we switched to this ipl from a different ipl
+system.cpu.kern.ipl_count_22                     1927      1.05%     42.02% # number of times we switched to this ipl
+system.cpu.kern.ipl_count_31                   106155     57.98%    100.00% # number of times we switched to this ipl
+system.cpu.kern.ipl_good                       149074                       # number of times we switched to this ipl from a different ipl
+system.cpu.kern.ipl_good_0                      73508     49.31%     49.31% # number of times we switched to this ipl from a different ipl
 system.cpu.kern.ipl_good_21                       131      0.09%     49.40% # number of times we switched to this ipl from a different ipl
-system.cpu.kern.ipl_good_22                      1923      1.29%     50.69% # number of times we switched to this ipl from a different ipl
-system.cpu.kern.ipl_good_31                     73495     49.31%    100.00% # number of times we switched to this ipl from a different ipl
-system.cpu.kern.ipl_ticks                1907145727000                       # number of cycles we spent at this ipl
-system.cpu.kern.ipl_ticks_0              1851261210000     97.07%     97.07% # number of cycles we spent at this ipl
-system.cpu.kern.ipl_ticks_21                 73754500      0.00%     97.07% # number of cycles we spent at this ipl
-system.cpu.kern.ipl_ticks_22                531976500      0.03%     97.10% # number of cycles we spent at this ipl
-system.cpu.kern.ipl_ticks_31              55278786000      2.90%    100.00% # number of cycles we spent at this ipl
-system.cpu.kern.ipl_used_0                   0.981740                       # fraction of swpipl calls that actually changed the ipl
+system.cpu.kern.ipl_good_22                      1927      1.29%     50.69% # number of times we switched to this ipl from a different ipl
+system.cpu.kern.ipl_good_31                     73508     49.31%    100.00% # number of times we switched to this ipl from a different ipl
+system.cpu.kern.ipl_ticks                1910308997000                       # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks_0              1853401678500     97.02%     97.02% # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks_21                 78202500      0.00%     97.03% # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks_22                538133000      0.03%     97.05% # number of cycles we spent at this ipl
+system.cpu.kern.ipl_ticks_31              56290983000      2.95%    100.00% # number of cycles we spent at this ipl
+system.cpu.kern.ipl_used_0                   0.981743                       # fraction of swpipl calls that actually changed the ipl
 system.cpu.kern.ipl_used_21                         1                       # fraction of swpipl calls that actually changed the ipl
 system.cpu.kern.ipl_used_22                         1                       # fraction of swpipl calls that actually changed the ipl
-system.cpu.kern.ipl_used_31                  0.692624                       # fraction of swpipl calls that actually changed the ipl
-system.cpu.kern.mode_good_kernel                 1910                      
-system.cpu.kern.mode_good_user                   1740                      
+system.cpu.kern.ipl_used_31                  0.692459                       # fraction of swpipl calls that actually changed the ipl
+system.cpu.kern.mode_good_kernel                 1908                      
+system.cpu.kern.mode_good_user                   1738                      
 system.cpu.kern.mode_good_idle                    170                      
-system.cpu.kern.mode_switch_kernel               5894                       # number of protection mode switches
-system.cpu.kern.mode_switch_user                 1740                       # number of protection mode switches
-system.cpu.kern.mode_switch_idle                 2096                       # number of protection mode switches
-system.cpu.kern.mode_switch_good             1.405165                       # fraction of useful protection mode switches
-system.cpu.kern.mode_switch_good_kernel      0.324058                       # fraction of useful protection mode switches
+system.cpu.kern.mode_switch_kernel               5896                       # number of protection mode switches
+system.cpu.kern.mode_switch_user                 1738                       # number of protection mode switches
+system.cpu.kern.mode_switch_idle                 2098                       # number of protection mode switches
+system.cpu.kern.mode_switch_good             1.404639                       # fraction of useful protection mode switches
+system.cpu.kern.mode_switch_good_kernel      0.323609                       # fraction of useful protection mode switches
 system.cpu.kern.mode_switch_good_user               1                       # fraction of useful protection mode switches
-system.cpu.kern.mode_switch_good_idle        0.081107                       # fraction of useful protection mode switches
-system.cpu.kern.mode_ticks_kernel         42657550000      2.24%      2.24% # number of ticks spent at the given mode
-system.cpu.kern.mode_ticks_user            4648649000      0.24%      2.48% # number of ticks spent at the given mode
-system.cpu.kern.mode_ticks_idle          1859839526000     97.52%    100.00% # number of ticks spent at the given mode
+system.cpu.kern.mode_switch_good_idle        0.081030                       # fraction of useful protection mode switches
+system.cpu.kern.mode_ticks_kernel         43115749000      2.26%      2.26% # number of ticks spent at the given mode
+system.cpu.kern.mode_ticks_user            4716926000      0.25%      2.50% # number of ticks spent at the given mode
+system.cpu.kern.mode_ticks_idle          1862476320000     97.50%    100.00% # number of ticks spent at the given mode
 system.cpu.kern.swap_context                     4177                       # number of times the context was actually changed
 system.cpu.kern.syscall                           326                       # number of syscalls executed
 system.cpu.kern.syscall_2                           8      2.45%      2.45% # number of syscalls executed
@@ -300,10 +267,10 @@ system.cpu.kern.syscall_98                          2      0.61%     97.55% # nu
 system.cpu.kern.syscall_132                         4      1.23%     98.77% # number of syscalls executed
 system.cpu.kern.syscall_144                         2      0.61%     99.39% # number of syscalls executed
 system.cpu.kern.syscall_147                         2      0.61%    100.00% # number of syscalls executed
-system.cpu.not_idle_fraction                 0.059216                       # Percentage of non-idle cycles
-system.cpu.numCycles                     1907146437000                       # number of cpu cycles simulated
-system.cpu.num_insts                         60037406                       # Number of instructions executed
-system.cpu.num_refs                          16305563                       # Number of memory references
+system.cpu.not_idle_fraction                 0.060363                       # Percentage of non-idle cycles
+system.cpu.numCycles                     1910309711000                       # number of cpu cycles simulated
+system.cpu.num_insts                         60034774                       # Number of instructions executed
+system.cpu.num_refs                          16305091                       # Number of memory references
 system.disk0.dma_read_bytes                      1024                       # Number of bytes transfered via DMA reads (not PRD).
 system.disk0.dma_read_full_pages                    0                       # Number of full page size DMA reads (not PRD).
 system.disk0.dma_read_txs                           1                       # Number of DMA read transactions (not PRD).
@@ -316,70 +283,79 @@ system.disk2.dma_read_txs                           0                       # Nu
 system.disk2.dma_write_bytes                     8192                       # Number of bytes transfered via DMA writes.
 system.disk2.dma_write_full_pages                   1                       # Number of full page size DMA writes.
 system.disk2.dma_write_txs                          1                       # Number of DMA write transactions.
-system.l2c.ReadExReq_accesses                  304305                       # number of ReadExReq accesses(hits+misses)
-system.l2c.ReadExReq_avg_miss_latency    13000.153945                       # average ReadExReq miss latency
-system.l2c.ReadExReq_avg_mshr_miss_latency 11000.153945                       # average ReadExReq mshr miss latency
-system.l2c.ReadExReq_hits                      187380                       # number of ReadExReq hits
-system.l2c.ReadExReq_miss_latency          1520043000                       # number of ReadExReq miss cycles
-system.l2c.ReadExReq_miss_rate               0.384236                       # miss rate for ReadExReq accesses
-system.l2c.ReadExReq_misses                    116925                       # number of ReadExReq misses
-system.l2c.ReadExReq_mshr_miss_latency     1286193000                       # number of ReadExReq MSHR miss cycles
-system.l2c.ReadExReq_mshr_miss_rate          0.384236                       # mshr miss rate for ReadExReq accesses
-system.l2c.ReadExReq_mshr_misses               116925                       # number of ReadExReq MSHR misses
-system.l2c.ReadReq_accesses                   2668854                       # number of ReadReq accesses(hits+misses)
-system.l2c.ReadReq_avg_miss_latency      13000.065889                       # average ReadReq miss latency
-system.l2c.ReadReq_avg_mshr_miss_latency 11000.065889                       # average ReadReq mshr miss latency
-system.l2c.ReadReq_hits                       1727874                       # number of ReadReq hits
-system.l2c.ReadReq_miss_latency           12232802000                       # number of ReadReq miss cycles
-system.l2c.ReadReq_miss_rate                 0.352578                       # miss rate for ReadReq accesses
-system.l2c.ReadReq_misses                      940980                       # number of ReadReq misses
-system.l2c.ReadReq_mshr_miss_latency      10350842000                       # number of ReadReq MSHR miss cycles
-system.l2c.ReadReq_mshr_miss_rate            0.352578                       # mshr miss rate for ReadReq accesses
-system.l2c.ReadReq_mshr_misses                 940980                       # number of ReadReq MSHR misses
-system.l2c.ReadReq_mshr_uncacheable              6727                       # number of ReadReq MSHR uncacheable
-system.l2c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.l2c.ReadResp_mshr_uncacheable_latency    750102000                       # number of ReadResp MSHR uncacheable cycles
-system.l2c.WriteReq_mshr_uncacheable             9438                       # number of WriteReq MSHR uncacheable
-system.l2c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.l2c.WriteResp_mshr_uncacheable_latency   1050666000                       # number of WriteResp MSHR uncacheable cycles
-system.l2c.Writeback_accesses                  429989                       # number of Writeback accesses(hits+misses)
-system.l2c.Writeback_hits                      429989                       # number of Writeback hits
+system.l2c.ReadExReq_accesses                  304522                       # number of ReadExReq accesses(hits+misses)
+system.l2c.ReadExReq_avg_miss_latency    12000.719160                       # average ReadExReq miss latency
+system.l2c.ReadExReq_avg_mshr_miss_latency 11000.719160                       # average ReadExReq mshr miss latency
+system.l2c.ReadExReq_miss_latency          3654483000                       # number of ReadExReq miss cycles
+system.l2c.ReadExReq_miss_rate                      1                       # miss rate for ReadExReq accesses
+system.l2c.ReadExReq_misses                    304522                       # number of ReadExReq misses
+system.l2c.ReadExReq_mshr_miss_latency     3349961000                       # number of ReadExReq MSHR miss cycles
+system.l2c.ReadExReq_mshr_miss_rate                 1                       # mshr miss rate for ReadExReq accesses
+system.l2c.ReadExReq_mshr_misses               304522                       # number of ReadExReq MSHR misses
+system.l2c.ReadReq_accesses                   2670005                       # number of ReadReq accesses(hits+misses)
+system.l2c.ReadReq_avg_miss_latency      12000.233269                       # average ReadReq miss latency
+system.l2c.ReadReq_avg_mshr_miss_latency 11000.233269                       # average ReadReq mshr miss latency
+system.l2c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.l2c.ReadReq_hits                       1568273                       # number of ReadReq hits
+system.l2c.ReadReq_miss_latency           13221041000                       # number of ReadReq miss cycles
+system.l2c.ReadReq_miss_rate                 0.412633                       # miss rate for ReadReq accesses
+system.l2c.ReadReq_misses                     1101732                       # number of ReadReq misses
+system.l2c.ReadReq_mshr_miss_latency      12119309000                       # number of ReadReq MSHR miss cycles
+system.l2c.ReadReq_mshr_miss_rate            0.412633                       # mshr miss rate for ReadReq accesses
+system.l2c.ReadReq_mshr_misses                1101732                       # number of ReadReq MSHR misses
+system.l2c.ReadReq_mshr_uncacheable_latency    750102000                       # number of ReadReq MSHR uncacheable cycles
+system.l2c.UpgradeReq_accesses                 125867                       # number of UpgradeReq accesses(hits+misses)
+system.l2c.UpgradeReq_avg_miss_latency   11999.892744                       # average UpgradeReq miss latency
+system.l2c.UpgradeReq_avg_mshr_miss_latency 11000.750793                       # average UpgradeReq mshr miss latency
+system.l2c.UpgradeReq_miss_latency         1510390500                       # number of UpgradeReq miss cycles
+system.l2c.UpgradeReq_miss_rate                     1                       # miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_misses                   125867                       # number of UpgradeReq misses
+system.l2c.UpgradeReq_mshr_miss_latency    1384631500                       # number of UpgradeReq MSHR miss cycles
+system.l2c.UpgradeReq_mshr_miss_rate                1                       # mshr miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_mshr_misses              125867                       # number of UpgradeReq MSHR misses
+system.l2c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.l2c.WriteReq_mshr_uncacheable_latency   1051110500                       # number of WriteReq MSHR uncacheable cycles
+system.l2c.Writeback_accesses                  429991                       # number of Writeback accesses(hits+misses)
+system.l2c.Writeback_miss_rate                      1                       # miss rate for Writeback accesses
+system.l2c.Writeback_misses                    429991                       # number of Writeback misses
+system.l2c.Writeback_mshr_miss_rate                 1                       # mshr miss rate for Writeback accesses
+system.l2c.Writeback_mshr_misses               429991                       # number of Writeback MSHR misses
 system.l2c.avg_blocked_cycles_no_mshrs   <err: div-0>                       # average number of cycles each access was blocked
 system.l2c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.l2c.avg_refs                          2.216875                       # Average number of references to valid blocks.
+system.l2c.avg_refs                          1.660842                       # Average number of references to valid blocks.
 system.l2c.blocked_no_mshrs                         0                       # number of cycles access was blocked
 system.l2c.blocked_no_targets                       0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_mshrs                  0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_targets                0                       # number of cycles access was blocked
 system.l2c.cache_copies                             0                       # number of cache copies performed
-system.l2c.demand_accesses                    2668854                       # number of demand (read+write) accesses
-system.l2c.demand_avg_miss_latency       13000.065889                       # average overall miss latency
-system.l2c.demand_avg_mshr_miss_latency  11000.065889                       # average overall mshr miss latency
-system.l2c.demand_hits                        1727874                       # number of demand (read+write) hits
-system.l2c.demand_miss_latency            12232802000                       # number of demand (read+write) miss cycles
-system.l2c.demand_miss_rate                  0.352578                       # miss rate for demand accesses
-system.l2c.demand_misses                       940980                       # number of demand (read+write) misses
+system.l2c.demand_accesses                    2974527                       # number of demand (read+write) accesses
+system.l2c.demand_avg_miss_latency       12000.338488                       # average overall miss latency
+system.l2c.demand_avg_mshr_miss_latency  11000.338488                       # average overall mshr miss latency
+system.l2c.demand_hits                        1568273                       # number of demand (read+write) hits
+system.l2c.demand_miss_latency            16875524000                       # number of demand (read+write) miss cycles
+system.l2c.demand_miss_rate                  0.472766                       # miss rate for demand accesses
+system.l2c.demand_misses                      1406254                       # number of demand (read+write) misses
 system.l2c.demand_mshr_hits                         0                       # number of demand (read+write) MSHR hits
-system.l2c.demand_mshr_miss_latency       10350842000                       # number of demand (read+write) MSHR miss cycles
-system.l2c.demand_mshr_miss_rate             0.352578                       # mshr miss rate for demand accesses
-system.l2c.demand_mshr_misses                  940980                       # number of demand (read+write) MSHR misses
+system.l2c.demand_mshr_miss_latency       15469270000                       # number of demand (read+write) MSHR miss cycles
+system.l2c.demand_mshr_miss_rate             0.472766                       # mshr miss rate for demand accesses
+system.l2c.demand_mshr_misses                 1406254                       # number of demand (read+write) MSHR misses
 system.l2c.fast_writes                              0                       # number of fast writes performed
 system.l2c.mshr_cap_events                          0                       # number of times MSHR cap was activated
 system.l2c.no_allocate_misses                       0                       # Number of misses that were no-allocate
-system.l2c.overall_accesses                   3098843                       # number of overall (read+write) accesses
-system.l2c.overall_avg_miss_latency      13000.065889                       # average overall miss latency
-system.l2c.overall_avg_mshr_miss_latency 11000.065889                       # average overall mshr miss latency
-system.l2c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.l2c.overall_hits                       2157863                       # number of overall hits
-system.l2c.overall_miss_latency           12232802000                       # number of overall miss cycles
-system.l2c.overall_miss_rate                 0.303655                       # miss rate for overall accesses
-system.l2c.overall_misses                      940980                       # number of overall misses
+system.l2c.overall_accesses                   2974527                       # number of overall (read+write) accesses
+system.l2c.overall_avg_miss_latency      12000.338488                       # average overall miss latency
+system.l2c.overall_avg_mshr_miss_latency 11000.338488                       # average overall mshr miss latency
+system.l2c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.l2c.overall_hits                       1568273                       # number of overall hits
+system.l2c.overall_miss_latency           16875524000                       # number of overall miss cycles
+system.l2c.overall_miss_rate                 0.472766                       # miss rate for overall accesses
+system.l2c.overall_misses                     1406254                       # number of overall misses
 system.l2c.overall_mshr_hits                        0                       # number of overall MSHR hits
-system.l2c.overall_mshr_miss_latency      10350842000                       # number of overall MSHR miss cycles
-system.l2c.overall_mshr_miss_rate            0.303655                       # mshr miss rate for overall accesses
-system.l2c.overall_mshr_misses                 940980                       # number of overall MSHR misses
-system.l2c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.l2c.overall_mshr_uncacheable_misses        16165                       # number of overall MSHR uncacheable misses
+system.l2c.overall_mshr_miss_latency      15469270000                       # number of overall MSHR miss cycles
+system.l2c.overall_mshr_miss_rate            0.472766                       # mshr miss rate for overall accesses
+system.l2c.overall_mshr_misses                1406254                       # number of overall MSHR misses
+system.l2c.overall_mshr_uncacheable_latency   1801212500                       # number of overall MSHR uncacheable cycles
+system.l2c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.l2c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.l2c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.l2c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -389,13 +365,13 @@ system.l2c.prefetcher.num_hwpf_issued               0                       # nu
 system.l2c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.l2c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.l2c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.l2c.replacements                        992369                       # number of replacements
-system.l2c.sampled_refs                       1057905                       # Sample count of references to valid blocks.
+system.l2c.replacements                        947259                       # number of replacements
+system.l2c.sampled_refs                        965538                       # Sample count of references to valid blocks.
 system.l2c.soft_prefetch_mshr_full                  0                       # number of mshr full events for SW prefetching instrutions
-system.l2c.tagsinuse                     65468.856552                       # Cycle average of tags in use
-system.l2c.total_refs                         2345243                       # Total number of references to valid blocks.
-system.l2c.warmup_cycle                    3045832000                       # Cycle when the warmup percentage was hit.
-system.l2c.writebacks                           74072                       # number of writebacks
+system.l2c.tagsinuse                     15874.904757                       # Cycle average of tags in use
+system.l2c.total_refs                         1603606                       # Total number of references to valid blocks.
+system.l2c.warmup_cycle                    4106790000                       # Cycle when the warmup percentage was hit.
+system.l2c.writebacks                               0                       # number of writebacks
 system.tsunami.ethernet.coalescedRxDesc  <err: div-0>                       # average number of RxDesc's coalesced into each post
 system.tsunami.ethernet.coalescedRxIdle  <err: div-0>                       # average number of RxIdle's coalesced into each post
 system.tsunami.ethernet.coalescedRxOk    <err: div-0>                       # average number of RxOk's coalesced into each post
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr
index f34493a86..32120d9d6 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr
@@ -1,3 +1,3 @@
-Listening for system connection on port 3456
-0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+Listening for system connection on port 3457
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout
index db9ad862d..69f3594a5 100644
--- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout
+++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout
@@ -5,10 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:10:03
-M5 started Mon Jun 11 01:14:34 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-timing tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing
+M5 compiled Aug  3 2007 04:02:11
+M5 started Fri Aug  3 04:23:34 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-timing tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing
 Global frequency set at 1000000000000 ticks per second
-      0: system.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
-Exiting @ tick 1907146437000 because m5_exit instruction encountered
+Exiting @ tick 1910309711000 because m5_exit instruction encountered
diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini
index e30600052..8bac0dec4 100644
--- a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini
+++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini
@@ -27,12 +27,9 @@ test=system.cpu0.l1c.cpu_side
 
 [system.cpu0.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -50,12 +47,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu0.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -64,11 +59,6 @@ write_buffers=8
 cpu_side=system.cpu0.test
 mem_side=system.toL2Bus.port[1]
 
-[system.cpu0.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu1]
 type=MemTest
 children=l1c
@@ -87,12 +77,9 @@ test=system.cpu1.l1c.cpu_side
 
 [system.cpu1.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -110,12 +97,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu1.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -124,11 +109,6 @@ write_buffers=8
 cpu_side=system.cpu1.test
 mem_side=system.toL2Bus.port[2]
 
-[system.cpu1.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu2]
 type=MemTest
 children=l1c
@@ -147,12 +127,9 @@ test=system.cpu2.l1c.cpu_side
 
 [system.cpu2.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -170,12 +147,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu2.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -184,11 +159,6 @@ write_buffers=8
 cpu_side=system.cpu2.test
 mem_side=system.toL2Bus.port[3]
 
-[system.cpu2.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu3]
 type=MemTest
 children=l1c
@@ -207,12 +177,9 @@ test=system.cpu3.l1c.cpu_side
 
 [system.cpu3.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -230,12 +197,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu3.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -244,11 +209,6 @@ write_buffers=8
 cpu_side=system.cpu3.test
 mem_side=system.toL2Bus.port[4]
 
-[system.cpu3.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu4]
 type=MemTest
 children=l1c
@@ -267,12 +227,9 @@ test=system.cpu4.l1c.cpu_side
 
 [system.cpu4.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -290,12 +247,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu4.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -304,11 +259,6 @@ write_buffers=8
 cpu_side=system.cpu4.test
 mem_side=system.toL2Bus.port[5]
 
-[system.cpu4.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu5]
 type=MemTest
 children=l1c
@@ -327,12 +277,9 @@ test=system.cpu5.l1c.cpu_side
 
 [system.cpu5.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -350,12 +297,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu5.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -364,11 +309,6 @@ write_buffers=8
 cpu_side=system.cpu5.test
 mem_side=system.toL2Bus.port[6]
 
-[system.cpu5.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu6]
 type=MemTest
 children=l1c
@@ -387,12 +327,9 @@ test=system.cpu6.l1c.cpu_side
 
 [system.cpu6.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -410,12 +347,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu6.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -424,11 +359,6 @@ write_buffers=8
 cpu_side=system.cpu6.test
 mem_side=system.toL2Bus.port[7]
 
-[system.cpu6.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.cpu7]
 type=MemTest
 children=l1c
@@ -447,12 +377,9 @@ test=system.cpu7.l1c.cpu_side
 
 [system.cpu7.l1c]
 type=BaseCache
-children=protocol
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=4
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
@@ -470,12 +397,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=system.cpu7.l1c.protocol
 repl=Null
 size=32768
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=8
 trace_addr=0
@@ -484,11 +409,6 @@ write_buffers=8
 cpu_side=system.cpu7.test
 mem_side=system.toL2Bus.port[8]
 
-[system.cpu7.l1c.protocol]
-type=CoherenceProtocol
-do_upgrades=true
-protocol=moesi
-
 [system.funcmem]
 type=PhysicalMemory
 file=
@@ -499,11 +419,9 @@ port=system.cpu0.functional system.cpu1.functional system.cpu2.functional system
 
 [system.l2c]
 type=BaseCache
-adaptive_compression=false
+addr_range=0:18446744073709551615
 assoc=8
 block_size=64
-compressed_bus=false
-compression_latency=0
 hash_delay=1
 latency=10000
 lifo=false
@@ -521,12 +439,10 @@ prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
-protocol=Null
 repl=Null
 size=65536
 split=false
 split_size=0
-store_compressed=false
 subblock_size=0
 tgts_per_mshr=16
 trace_addr=0
diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt b/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt
index 752268088..c54bfdce4 100644
--- a/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt
+++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt
@@ -1,72 +1,71 @@
 
 ---------- Begin Simulation Statistics ----------
-host_seconds                                 37943.64                       # Real time elapsed on the host
-host_tick_rate                                   2223                       # Simulator tick rate (ticks/s)
+host_mem_usage                                 318912                       # Number of bytes of host memory used
+host_seconds                                   272.84                       # Real time elapsed on the host
+host_tick_rate                                 598087                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_seconds                                  0.000084                       # Number of seconds simulated
-sim_ticks                                    84350509                       # Number of ticks simulated
-system.cpu0.l1c.ReadReq_accesses                44421                       # number of ReadReq accesses(hits+misses)
-system.cpu0.l1c.ReadReq_avg_miss_latency 14010.391786                       # average ReadReq miss latency
-system.cpu0.l1c.ReadReq_avg_mshr_miss_latency 12986.475734                       # average ReadReq mshr miss latency
-system.cpu0.l1c.ReadReq_hits                     7291                       # number of ReadReq hits
-system.cpu0.l1c.ReadReq_miss_latency        520205847                       # number of ReadReq miss cycles
-system.cpu0.l1c.ReadReq_miss_rate            0.835866                       # miss rate for ReadReq accesses
-system.cpu0.l1c.ReadReq_misses                  37130                       # number of ReadReq misses
-system.cpu0.l1c.ReadReq_mshr_miss_latency    482187844                       # number of ReadReq MSHR miss cycles
-system.cpu0.l1c.ReadReq_mshr_miss_rate       0.835866                       # mshr miss rate for ReadReq accesses
-system.cpu0.l1c.ReadReq_mshr_misses             37130                       # number of ReadReq MSHR misses
-system.cpu0.l1c.ReadReq_mshr_uncacheable         9916                       # number of ReadReq MSHR uncacheable
-system.cpu0.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu0.l1c.ReadResp_mshr_uncacheable_latency    255520881                       # number of ReadResp MSHR uncacheable cycles
-system.cpu0.l1c.WriteReq_accesses               23898                       # number of WriteReq accesses(hits+misses)
-system.cpu0.l1c.WriteReq_avg_miss_latency 12904.605270                       # average WriteReq miss latency
-system.cpu0.l1c.WriteReq_avg_mshr_miss_latency 11399.917485                       # average WriteReq mshr miss latency
-system.cpu0.l1c.WriteReq_hits                    1090                       # number of WriteReq hits
-system.cpu0.l1c.WriteReq_miss_latency       294328237                       # number of WriteReq miss cycles
-system.cpu0.l1c.WriteReq_miss_rate           0.954389                       # miss rate for WriteReq accesses
-system.cpu0.l1c.WriteReq_misses                 22808                       # number of WriteReq misses
-system.cpu0.l1c.WriteReq_mshr_miss_latency    260009318                       # number of WriteReq MSHR miss cycles
-system.cpu0.l1c.WriteReq_mshr_miss_rate      0.954389                       # mshr miss rate for WriteReq accesses
-system.cpu0.l1c.WriteReq_mshr_misses            22808                       # number of WriteReq MSHR misses
-system.cpu0.l1c.WriteReq_mshr_uncacheable         5184                       # number of WriteReq MSHR uncacheable
-system.cpu0.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu0.l1c.WriteResp_mshr_uncacheable_latency    154702333                       # number of WriteResp MSHR uncacheable cycles
-system.cpu0.l1c.avg_blocked_cycles_no_mshrs  1194.948852                       # average number of cycles each access was blocked
+sim_seconds                                  0.000163                       # Number of seconds simulated
+sim_ticks                                   163182312                       # Number of ticks simulated
+system.cpu0.l1c.ReadReq_accesses                44955                       # number of ReadReq accesses(hits+misses)
+system.cpu0.l1c.ReadReq_avg_miss_latency 22713.586650                       # average ReadReq miss latency
+system.cpu0.l1c.ReadReq_avg_mshr_miss_latency 22705.587882                       # average ReadReq mshr miss latency
+system.cpu0.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu0.l1c.ReadReq_hits                     7621                       # number of ReadReq hits
+system.cpu0.l1c.ReadReq_miss_latency        847989044                       # number of ReadReq miss cycles
+system.cpu0.l1c.ReadReq_miss_rate            0.830475                       # miss rate for ReadReq accesses
+system.cpu0.l1c.ReadReq_misses                  37334                       # number of ReadReq misses
+system.cpu0.l1c.ReadReq_mshr_miss_latency    847690418                       # number of ReadReq MSHR miss cycles
+system.cpu0.l1c.ReadReq_mshr_miss_rate       0.830475                       # mshr miss rate for ReadReq accesses
+system.cpu0.l1c.ReadReq_mshr_misses             37334                       # number of ReadReq MSHR misses
+system.cpu0.l1c.ReadReq_mshr_uncacheable_latency    517943783                       # number of ReadReq MSHR uncacheable cycles
+system.cpu0.l1c.WriteReq_accesses               24357                       # number of WriteReq accesses(hits+misses)
+system.cpu0.l1c.WriteReq_avg_miss_latency 24775.291654                       # average WriteReq miss latency
+system.cpu0.l1c.WriteReq_avg_mshr_miss_latency 24768.103842                       # average WriteReq mshr miss latency
+system.cpu0.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu0.l1c.WriteReq_hits                     956                       # number of WriteReq hits
+system.cpu0.l1c.WriteReq_miss_latency       579766600                       # number of WriteReq miss cycles
+system.cpu0.l1c.WriteReq_miss_rate           0.960751                       # miss rate for WriteReq accesses
+system.cpu0.l1c.WriteReq_misses                 23401                       # number of WriteReq misses
+system.cpu0.l1c.WriteReq_mshr_miss_latency    579598398                       # number of WriteReq MSHR miss cycles
+system.cpu0.l1c.WriteReq_mshr_miss_rate      0.960751                       # mshr miss rate for WriteReq accesses
+system.cpu0.l1c.WriteReq_mshr_misses            23401                       # number of WriteReq MSHR misses
+system.cpu0.l1c.WriteReq_mshr_uncacheable_latency    315492846                       # number of WriteReq MSHR uncacheable cycles
+system.cpu0.l1c.avg_blocked_cycles_no_mshrs  2283.512556                       # average number of cycles each access was blocked
 system.cpu0.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu0.l1c.avg_refs                     0.407238                       # Average number of references to valid blocks.
-system.cpu0.l1c.blocked_no_mshrs                69093                       # number of cycles access was blocked
+system.cpu0.l1c.avg_refs                     0.411295                       # Average number of references to valid blocks.
+system.cpu0.l1c.blocked_no_mshrs                69290                       # number of cycles access was blocked
 system.cpu0.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu0.l1c.blocked_cycles_no_mshrs      82562601                       # number of cycles access was blocked
+system.cpu0.l1c.blocked_cycles_no_mshrs     158224585                       # number of cycles access was blocked
 system.cpu0.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu0.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu0.l1c.demand_accesses                 68319                       # number of demand (read+write) accesses
-system.cpu0.l1c.demand_avg_miss_latency  13589.610664                       # average overall miss latency
-system.cpu0.l1c.demand_avg_mshr_miss_latency 12382.748206                       # average overall mshr miss latency
-system.cpu0.l1c.demand_hits                      8381                       # number of demand (read+write) hits
-system.cpu0.l1c.demand_miss_latency         814534084                       # number of demand (read+write) miss cycles
-system.cpu0.l1c.demand_miss_rate             0.877325                       # miss rate for demand accesses
-system.cpu0.l1c.demand_misses                   59938                       # number of demand (read+write) misses
+system.cpu0.l1c.demand_accesses                 69312                       # number of demand (read+write) accesses
+system.cpu0.l1c.demand_avg_miss_latency  23507.954952                       # average overall miss latency
+system.cpu0.l1c.demand_avg_mshr_miss_latency 23500.268642                       # average overall mshr miss latency
+system.cpu0.l1c.demand_hits                      8577                       # number of demand (read+write) hits
+system.cpu0.l1c.demand_miss_latency        1427755644                       # number of demand (read+write) miss cycles
+system.cpu0.l1c.demand_miss_rate             0.876255                       # miss rate for demand accesses
+system.cpu0.l1c.demand_misses                   60735                       # number of demand (read+write) misses
 system.cpu0.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu0.l1c.demand_mshr_miss_latency    742197162                       # number of demand (read+write) MSHR miss cycles
-system.cpu0.l1c.demand_mshr_miss_rate        0.877325                       # mshr miss rate for demand accesses
-system.cpu0.l1c.demand_mshr_misses              59938                       # number of demand (read+write) MSHR misses
+system.cpu0.l1c.demand_mshr_miss_latency   1427288816                       # number of demand (read+write) MSHR miss cycles
+system.cpu0.l1c.demand_mshr_miss_rate        0.876255                       # mshr miss rate for demand accesses
+system.cpu0.l1c.demand_mshr_misses              60735                       # number of demand (read+write) MSHR misses
 system.cpu0.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu0.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu0.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu0.l1c.overall_accesses                68319                       # number of overall (read+write) accesses
-system.cpu0.l1c.overall_avg_miss_latency 13589.610664                       # average overall miss latency
-system.cpu0.l1c.overall_avg_mshr_miss_latency 12382.748206                       # average overall mshr miss latency
-system.cpu0.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu0.l1c.overall_hits                     8381                       # number of overall hits
-system.cpu0.l1c.overall_miss_latency        814534084                       # number of overall miss cycles
-system.cpu0.l1c.overall_miss_rate            0.877325                       # miss rate for overall accesses
-system.cpu0.l1c.overall_misses                  59938                       # number of overall misses
+system.cpu0.l1c.overall_accesses                69312                       # number of overall (read+write) accesses
+system.cpu0.l1c.overall_avg_miss_latency 23507.954952                       # average overall miss latency
+system.cpu0.l1c.overall_avg_mshr_miss_latency 23500.268642                       # average overall mshr miss latency
+system.cpu0.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu0.l1c.overall_hits                     8577                       # number of overall hits
+system.cpu0.l1c.overall_miss_latency       1427755644                       # number of overall miss cycles
+system.cpu0.l1c.overall_miss_rate            0.876255                       # miss rate for overall accesses
+system.cpu0.l1c.overall_misses                  60735                       # number of overall misses
 system.cpu0.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu0.l1c.overall_mshr_miss_latency    742197162                       # number of overall MSHR miss cycles
-system.cpu0.l1c.overall_mshr_miss_rate       0.877325                       # mshr miss rate for overall accesses
-system.cpu0.l1c.overall_mshr_misses             59938                       # number of overall MSHR misses
-system.cpu0.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu0.l1c.overall_mshr_uncacheable_misses        15100                       # number of overall MSHR uncacheable misses
+system.cpu0.l1c.overall_mshr_miss_latency   1427288816                       # number of overall MSHR miss cycles
+system.cpu0.l1c.overall_mshr_miss_rate       0.876255                       # mshr miss rate for overall accesses
+system.cpu0.l1c.overall_mshr_misses             60735                       # number of overall MSHR misses
+system.cpu0.l1c.overall_mshr_uncacheable_latency    833436629                       # number of overall MSHR uncacheable cycles
+system.cpu0.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu0.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu0.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu0.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -76,104 +75,76 @@ system.cpu0.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu0.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu0.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu0.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu0.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu0.l1c.protocol.read_invalid         1761660                       # read misses to invalid blocks
-system.cpu0.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu0.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu0.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu0.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu0.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu0.l1c.protocol.snoop_read_exclusive         2836                       # read snoops on exclusive blocks
-system.cpu0.l1c.protocol.snoop_read_modified        12378                       # read snoops on modified blocks
-system.cpu0.l1c.protocol.snoop_read_owned         7300                       # read snoops on owned blocks
-system.cpu0.l1c.protocol.snoop_read_shared      1749577                       # read snoops on shared blocks
-system.cpu0.l1c.protocol.snoop_readex_exclusive         1616                       # readEx snoops on exclusive blocks
-system.cpu0.l1c.protocol.snoop_readex_modified         6692                       # readEx snoops on modified blocks
-system.cpu0.l1c.protocol.snoop_readex_owned         4009                       # readEx snoops on owned blocks
-system.cpu0.l1c.protocol.snoop_readex_shared        12550                       # readEx snoops on shared blocks
-system.cpu0.l1c.protocol.snoop_upgrade_owned          790                       # upgrade snoops on owned blocks
-system.cpu0.l1c.protocol.snoop_upgrade_shared         3004                       # upgradee snoops on shared blocks
-system.cpu0.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu0.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu0.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu0.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu0.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu0.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu0.l1c.protocol.write_invalid         940728                       # write misses to invalid blocks
-system.cpu0.l1c.protocol.write_owned             1344                       # write misses to owned blocks
-system.cpu0.l1c.protocol.write_shared            4484                       # write misses to shared blocks
-system.cpu0.l1c.replacements                    27160                       # number of replacements
-system.cpu0.l1c.sampled_refs                    27495                       # Sample count of references to valid blocks.
+system.cpu0.l1c.replacements                    28052                       # number of replacements
+system.cpu0.l1c.sampled_refs                    28403                       # Sample count of references to valid blocks.
 system.cpu0.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu0.l1c.tagsinuse                  342.709273                       # Cycle average of tags in use
-system.cpu0.l1c.total_refs                      11197                       # Total number of references to valid blocks.
+system.cpu0.l1c.tagsinuse                  348.576200                       # Cycle average of tags in use
+system.cpu0.l1c.total_refs                      11682                       # Total number of references to valid blocks.
 system.cpu0.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu0.l1c.writebacks                      10716                       # number of writebacks
+system.cpu0.l1c.writebacks                      11146                       # number of writebacks
 system.cpu0.num_copies                              0                       # number of copy accesses completed
-system.cpu0.num_reads                           98012                       # number of read accesses completed
-system.cpu0.num_writes                          53207                       # number of write accesses completed
-system.cpu1.l1c.ReadReq_accesses                44893                       # number of ReadReq accesses(hits+misses)
-system.cpu1.l1c.ReadReq_avg_miss_latency 13909.754864                       # average ReadReq miss latency
-system.cpu1.l1c.ReadReq_avg_mshr_miss_latency 12900.185775                       # average ReadReq mshr miss latency
-system.cpu1.l1c.ReadReq_hits                     7579                       # number of ReadReq hits
-system.cpu1.l1c.ReadReq_miss_latency        519028593                       # number of ReadReq miss cycles
-system.cpu1.l1c.ReadReq_miss_rate            0.831176                       # miss rate for ReadReq accesses
-system.cpu1.l1c.ReadReq_misses                  37314                       # number of ReadReq misses
-system.cpu1.l1c.ReadReq_mshr_miss_latency    481357532                       # number of ReadReq MSHR miss cycles
-system.cpu1.l1c.ReadReq_mshr_miss_rate       0.831176                       # mshr miss rate for ReadReq accesses
-system.cpu1.l1c.ReadReq_mshr_misses             37314                       # number of ReadReq MSHR misses
-system.cpu1.l1c.ReadReq_mshr_uncacheable         9811                       # number of ReadReq MSHR uncacheable
-system.cpu1.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu1.l1c.ReadResp_mshr_uncacheable_latency    251708747                       # number of ReadResp MSHR uncacheable cycles
-system.cpu1.l1c.WriteReq_accesses               24614                       # number of WriteReq accesses(hits+misses)
-system.cpu1.l1c.WriteReq_avg_miss_latency 12788.679753                       # average WriteReq miss latency
-system.cpu1.l1c.WriteReq_avg_mshr_miss_latency 11344.205121                       # average WriteReq mshr miss latency
-system.cpu1.l1c.WriteReq_hits                    1257                       # number of WriteReq hits
-system.cpu1.l1c.WriteReq_miss_latency       298705193                       # number of WriteReq miss cycles
-system.cpu1.l1c.WriteReq_miss_rate           0.948932                       # miss rate for WriteReq accesses
-system.cpu1.l1c.WriteReq_misses                 23357                       # number of WriteReq misses
-system.cpu1.l1c.WriteReq_mshr_miss_latency    264966599                       # number of WriteReq MSHR miss cycles
-system.cpu1.l1c.WriteReq_mshr_miss_rate      0.948932                       # mshr miss rate for WriteReq accesses
-system.cpu1.l1c.WriteReq_mshr_misses            23357                       # number of WriteReq MSHR misses
-system.cpu1.l1c.WriteReq_mshr_uncacheable         5453                       # number of WriteReq MSHR uncacheable
-system.cpu1.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu1.l1c.WriteResp_mshr_uncacheable_latency    163813954                       # number of WriteResp MSHR uncacheable cycles
-system.cpu1.l1c.avg_blocked_cycles_no_mshrs  1183.149435                       # average number of cycles each access was blocked
+system.cpu0.num_reads                           99892                       # number of read accesses completed
+system.cpu0.num_writes                          54159                       # number of write accesses completed
+system.cpu1.l1c.ReadReq_accesses                44788                       # number of ReadReq accesses(hits+misses)
+system.cpu1.l1c.ReadReq_avg_miss_latency 22745.661074                       # average ReadReq miss latency
+system.cpu1.l1c.ReadReq_avg_mshr_miss_latency 22737.662205                       # average ReadReq mshr miss latency
+system.cpu1.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu1.l1c.ReadReq_hits                     7659                       # number of ReadReq hits
+system.cpu1.l1c.ReadReq_miss_latency        844523650                       # number of ReadReq miss cycles
+system.cpu1.l1c.ReadReq_miss_rate            0.828994                       # miss rate for ReadReq accesses
+system.cpu1.l1c.ReadReq_misses                  37129                       # number of ReadReq misses
+system.cpu1.l1c.ReadReq_mshr_miss_latency    844226660                       # number of ReadReq MSHR miss cycles
+system.cpu1.l1c.ReadReq_mshr_miss_rate       0.828994                       # mshr miss rate for ReadReq accesses
+system.cpu1.l1c.ReadReq_mshr_misses             37129                       # number of ReadReq MSHR misses
+system.cpu1.l1c.ReadReq_mshr_uncacheable_latency    524670355                       # number of ReadReq MSHR uncacheable cycles
+system.cpu1.l1c.WriteReq_accesses               24323                       # number of WriteReq accesses(hits+misses)
+system.cpu1.l1c.WriteReq_avg_miss_latency 24767.283276                       # average WriteReq miss latency
+system.cpu1.l1c.WriteReq_avg_mshr_miss_latency 24760.081804                       # average WriteReq mshr miss latency
+system.cpu1.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu1.l1c.WriteReq_hits                     950                       # number of WriteReq hits
+system.cpu1.l1c.WriteReq_miss_latency       578885712                       # number of WriteReq miss cycles
+system.cpu1.l1c.WriteReq_miss_rate           0.960942                       # miss rate for WriteReq accesses
+system.cpu1.l1c.WriteReq_misses                 23373                       # number of WriteReq misses
+system.cpu1.l1c.WriteReq_mshr_miss_latency    578717392                       # number of WriteReq MSHR miss cycles
+system.cpu1.l1c.WriteReq_mshr_miss_rate      0.960942                       # mshr miss rate for WriteReq accesses
+system.cpu1.l1c.WriteReq_mshr_misses            23373                       # number of WriteReq MSHR misses
+system.cpu1.l1c.WriteReq_mshr_uncacheable_latency    319087206                       # number of WriteReq MSHR uncacheable cycles
+system.cpu1.l1c.avg_blocked_cycles_no_mshrs  2291.446711                       # average number of cycles each access was blocked
 system.cpu1.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu1.l1c.avg_refs                     0.414323                       # Average number of references to valid blocks.
-system.cpu1.l1c.blocked_no_mshrs                69763                       # number of cycles access was blocked
+system.cpu1.l1c.avg_refs                     0.414757                       # Average number of references to valid blocks.
+system.cpu1.l1c.blocked_no_mshrs                69358                       # number of cycles access was blocked
 system.cpu1.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu1.l1c.blocked_cycles_no_mshrs      82540054                       # number of cycles access was blocked
+system.cpu1.l1c.blocked_cycles_no_mshrs     158930161                       # number of cycles access was blocked
 system.cpu1.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu1.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu1.l1c.demand_accesses                 69507                       # number of demand (read+write) accesses
-system.cpu1.l1c.demand_avg_miss_latency  13478.165615                       # average overall miss latency
-system.cpu1.l1c.demand_avg_mshr_miss_latency 12301.167461                       # average overall mshr miss latency
-system.cpu1.l1c.demand_hits                      8836                       # number of demand (read+write) hits
-system.cpu1.l1c.demand_miss_latency         817733786                       # number of demand (read+write) miss cycles
-system.cpu1.l1c.demand_miss_rate             0.872876                       # miss rate for demand accesses
-system.cpu1.l1c.demand_misses                   60671                       # number of demand (read+write) misses
+system.cpu1.l1c.demand_accesses                 69111                       # number of demand (read+write) accesses
+system.cpu1.l1c.demand_avg_miss_latency  23526.649731                       # average overall miss latency
+system.cpu1.l1c.demand_avg_mshr_miss_latency 23518.958910                       # average overall mshr miss latency
+system.cpu1.l1c.demand_hits                      8609                       # number of demand (read+write) hits
+system.cpu1.l1c.demand_miss_latency        1423409362                       # number of demand (read+write) miss cycles
+system.cpu1.l1c.demand_miss_rate             0.875432                       # miss rate for demand accesses
+system.cpu1.l1c.demand_misses                   60502                       # number of demand (read+write) misses
 system.cpu1.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu1.l1c.demand_mshr_miss_latency    746324131                       # number of demand (read+write) MSHR miss cycles
-system.cpu1.l1c.demand_mshr_miss_rate        0.872876                       # mshr miss rate for demand accesses
-system.cpu1.l1c.demand_mshr_misses              60671                       # number of demand (read+write) MSHR misses
+system.cpu1.l1c.demand_mshr_miss_latency   1422944052                       # number of demand (read+write) MSHR miss cycles
+system.cpu1.l1c.demand_mshr_miss_rate        0.875432                       # mshr miss rate for demand accesses
+system.cpu1.l1c.demand_mshr_misses              60502                       # number of demand (read+write) MSHR misses
 system.cpu1.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu1.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu1.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu1.l1c.overall_accesses                69507                       # number of overall (read+write) accesses
-system.cpu1.l1c.overall_avg_miss_latency 13478.165615                       # average overall miss latency
-system.cpu1.l1c.overall_avg_mshr_miss_latency 12301.167461                       # average overall mshr miss latency
-system.cpu1.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu1.l1c.overall_hits                     8836                       # number of overall hits
-system.cpu1.l1c.overall_miss_latency        817733786                       # number of overall miss cycles
-system.cpu1.l1c.overall_miss_rate            0.872876                       # miss rate for overall accesses
-system.cpu1.l1c.overall_misses                  60671                       # number of overall misses
+system.cpu1.l1c.overall_accesses                69111                       # number of overall (read+write) accesses
+system.cpu1.l1c.overall_avg_miss_latency 23526.649731                       # average overall miss latency
+system.cpu1.l1c.overall_avg_mshr_miss_latency 23518.958910                       # average overall mshr miss latency
+system.cpu1.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu1.l1c.overall_hits                     8609                       # number of overall hits
+system.cpu1.l1c.overall_miss_latency       1423409362                       # number of overall miss cycles
+system.cpu1.l1c.overall_miss_rate            0.875432                       # miss rate for overall accesses
+system.cpu1.l1c.overall_misses                  60502                       # number of overall misses
 system.cpu1.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu1.l1c.overall_mshr_miss_latency    746324131                       # number of overall MSHR miss cycles
-system.cpu1.l1c.overall_mshr_miss_rate       0.872876                       # mshr miss rate for overall accesses
-system.cpu1.l1c.overall_mshr_misses             60671                       # number of overall MSHR misses
-system.cpu1.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu1.l1c.overall_mshr_uncacheable_misses        15264                       # number of overall MSHR uncacheable misses
+system.cpu1.l1c.overall_mshr_miss_latency   1422944052                       # number of overall MSHR miss cycles
+system.cpu1.l1c.overall_mshr_miss_rate       0.875432                       # mshr miss rate for overall accesses
+system.cpu1.l1c.overall_mshr_misses             60502                       # number of overall MSHR misses
+system.cpu1.l1c.overall_mshr_uncacheable_latency    843757561                       # number of overall MSHR uncacheable cycles
+system.cpu1.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu1.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu1.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu1.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -183,104 +154,76 @@ system.cpu1.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu1.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu1.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu1.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu1.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu1.l1c.protocol.read_invalid         1717891                       # read misses to invalid blocks
-system.cpu1.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu1.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu1.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu1.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu1.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu1.l1c.protocol.snoop_read_exclusive         2925                       # read snoops on exclusive blocks
-system.cpu1.l1c.protocol.snoop_read_modified        12701                       # read snoops on modified blocks
-system.cpu1.l1c.protocol.snoop_read_owned         7436                       # read snoops on owned blocks
-system.cpu1.l1c.protocol.snoop_read_shared      1669937                       # read snoops on shared blocks
-system.cpu1.l1c.protocol.snoop_readex_exclusive         1611                       # readEx snoops on exclusive blocks
-system.cpu1.l1c.protocol.snoop_readex_modified         6726                       # readEx snoops on modified blocks
-system.cpu1.l1c.protocol.snoop_readex_owned         3965                       # readEx snoops on owned blocks
-system.cpu1.l1c.protocol.snoop_readex_shared        12596                       # readEx snoops on shared blocks
-system.cpu1.l1c.protocol.snoop_upgrade_owned          860                       # upgrade snoops on owned blocks
-system.cpu1.l1c.protocol.snoop_upgrade_shared         2979                       # upgradee snoops on shared blocks
-system.cpu1.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu1.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu1.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu1.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu1.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu1.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu1.l1c.protocol.write_invalid         914774                       # write misses to invalid blocks
-system.cpu1.l1c.protocol.write_owned             1422                       # write misses to owned blocks
-system.cpu1.l1c.protocol.write_shared            4382                       # write misses to shared blocks
-system.cpu1.l1c.replacements                    27806                       # number of replacements
-system.cpu1.l1c.sampled_refs                    28164                       # Sample count of references to valid blocks.
+system.cpu1.l1c.replacements                    27765                       # number of replacements
+system.cpu1.l1c.sampled_refs                    28108                       # Sample count of references to valid blocks.
 system.cpu1.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu1.l1c.tagsinuse                  345.545872                       # Cycle average of tags in use
-system.cpu1.l1c.total_refs                      11669                       # Total number of references to valid blocks.
+system.cpu1.l1c.tagsinuse                  346.327274                       # Cycle average of tags in use
+system.cpu1.l1c.total_refs                      11658                       # Total number of references to valid blocks.
 system.cpu1.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu1.l1c.writebacks                      11204                       # number of writebacks
+system.cpu1.l1c.writebacks                      10962                       # number of writebacks
 system.cpu1.num_copies                              0                       # number of copy accesses completed
-system.cpu1.num_reads                          100000                       # number of read accesses completed
-system.cpu1.num_writes                          54335                       # number of write accesses completed
-system.cpu2.l1c.ReadReq_accesses                44489                       # number of ReadReq accesses(hits+misses)
-system.cpu2.l1c.ReadReq_avg_miss_latency 14018.031231                       # average ReadReq miss latency
-system.cpu2.l1c.ReadReq_avg_mshr_miss_latency 12993.788573                       # average ReadReq mshr miss latency
-system.cpu2.l1c.ReadReq_hits                     7507                       # number of ReadReq hits
-system.cpu2.l1c.ReadReq_miss_latency        518414831                       # number of ReadReq miss cycles
-system.cpu2.l1c.ReadReq_miss_rate            0.831262                       # miss rate for ReadReq accesses
-system.cpu2.l1c.ReadReq_misses                  36982                       # number of ReadReq misses
-system.cpu2.l1c.ReadReq_mshr_miss_latency    480536289                       # number of ReadReq MSHR miss cycles
-system.cpu2.l1c.ReadReq_mshr_miss_rate       0.831262                       # mshr miss rate for ReadReq accesses
-system.cpu2.l1c.ReadReq_mshr_misses             36982                       # number of ReadReq MSHR misses
-system.cpu2.l1c.ReadReq_mshr_uncacheable         9861                       # number of ReadReq MSHR uncacheable
-system.cpu2.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu2.l1c.ReadResp_mshr_uncacheable_latency    253484666                       # number of ReadResp MSHR uncacheable cycles
-system.cpu2.l1c.WriteReq_accesses               24340                       # number of WriteReq accesses(hits+misses)
-system.cpu2.l1c.WriteReq_avg_miss_latency 12765.385606                       # average WriteReq miss latency
-system.cpu2.l1c.WriteReq_avg_mshr_miss_latency 11318.971789                       # average WriteReq mshr miss latency
-system.cpu2.l1c.WriteReq_hits                    1122                       # number of WriteReq hits
-system.cpu2.l1c.WriteReq_miss_latency       296386723                       # number of WriteReq miss cycles
-system.cpu2.l1c.WriteReq_miss_rate           0.953903                       # miss rate for WriteReq accesses
-system.cpu2.l1c.WriteReq_misses                 23218                       # number of WriteReq misses
-system.cpu2.l1c.WriteReq_mshr_miss_latency    262803887                       # number of WriteReq MSHR miss cycles
-system.cpu2.l1c.WriteReq_mshr_miss_rate      0.953903                       # mshr miss rate for WriteReq accesses
-system.cpu2.l1c.WriteReq_mshr_misses            23218                       # number of WriteReq MSHR misses
-system.cpu2.l1c.WriteReq_mshr_uncacheable         5480                       # number of WriteReq MSHR uncacheable
-system.cpu2.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu2.l1c.WriteResp_mshr_uncacheable_latency    165110755                       # number of WriteResp MSHR uncacheable cycles
-system.cpu2.l1c.avg_blocked_cycles_no_mshrs  1190.317505                       # average number of cycles each access was blocked
+system.cpu1.num_reads                           99692                       # number of read accesses completed
+system.cpu1.num_writes                          53844                       # number of write accesses completed
+system.cpu2.l1c.ReadReq_accesses                45045                       # number of ReadReq accesses(hits+misses)
+system.cpu2.l1c.ReadReq_avg_miss_latency 22675.185062                       # average ReadReq miss latency
+system.cpu2.l1c.ReadReq_avg_mshr_miss_latency 22667.185702                       # average ReadReq mshr miss latency
+system.cpu2.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu2.l1c.ReadReq_hits                     7544                       # number of ReadReq hits
+system.cpu2.l1c.ReadReq_miss_latency        850342115                       # number of ReadReq miss cycles
+system.cpu2.l1c.ReadReq_miss_rate            0.832523                       # miss rate for ReadReq accesses
+system.cpu2.l1c.ReadReq_misses                  37501                       # number of ReadReq misses
+system.cpu2.l1c.ReadReq_mshr_miss_latency    850042131                       # number of ReadReq MSHR miss cycles
+system.cpu2.l1c.ReadReq_mshr_miss_rate       0.832523                       # mshr miss rate for ReadReq accesses
+system.cpu2.l1c.ReadReq_mshr_misses             37501                       # number of ReadReq MSHR misses
+system.cpu2.l1c.ReadReq_mshr_uncacheable_latency    526690736                       # number of ReadReq MSHR uncacheable cycles
+system.cpu2.l1c.WriteReq_accesses               23975                       # number of WriteReq accesses(hits+misses)
+system.cpu2.l1c.WriteReq_avg_miss_latency 24810.638326                       # average WriteReq miss latency
+system.cpu2.l1c.WriteReq_avg_mshr_miss_latency 24803.479873                       # average WriteReq mshr miss latency
+system.cpu2.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu2.l1c.WriteReq_hits                     946                       # number of WriteReq hits
+system.cpu2.l1c.WriteReq_miss_latency       571364190                       # number of WriteReq miss cycles
+system.cpu2.l1c.WriteReq_miss_rate           0.960542                       # miss rate for WriteReq accesses
+system.cpu2.l1c.WriteReq_misses                 23029                       # number of WriteReq misses
+system.cpu2.l1c.WriteReq_mshr_miss_latency    571199338                       # number of WriteReq MSHR miss cycles
+system.cpu2.l1c.WriteReq_mshr_miss_rate      0.960542                       # mshr miss rate for WriteReq accesses
+system.cpu2.l1c.WriteReq_mshr_misses            23029                       # number of WriteReq MSHR misses
+system.cpu2.l1c.WriteReq_mshr_uncacheable_latency    314108208                       # number of WriteReq MSHR uncacheable cycles
+system.cpu2.l1c.avg_blocked_cycles_no_mshrs  2295.331392                       # average number of cycles each access was blocked
 system.cpu2.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu2.l1c.avg_refs                     0.414721                       # Average number of references to valid blocks.
-system.cpu2.l1c.blocked_no_mshrs                69202                       # number of cycles access was blocked
+system.cpu2.l1c.avg_refs                     0.417132                       # Average number of references to valid blocks.
+system.cpu2.l1c.blocked_no_mshrs                69383                       # number of cycles access was blocked
 system.cpu2.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu2.l1c.blocked_cycles_no_mshrs      82372352                       # number of cycles access was blocked
+system.cpu2.l1c.blocked_cycles_no_mshrs     159256978                       # number of cycles access was blocked
 system.cpu2.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu2.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu2.l1c.demand_accesses                 68829                       # number of demand (read+write) accesses
-system.cpu2.l1c.demand_avg_miss_latency  13534.909535                       # average overall miss latency
-system.cpu2.l1c.demand_avg_mshr_miss_latency 12347.843455                       # average overall mshr miss latency
-system.cpu2.l1c.demand_hits                      8629                       # number of demand (read+write) hits
-system.cpu2.l1c.demand_miss_latency         814801554                       # number of demand (read+write) miss cycles
-system.cpu2.l1c.demand_miss_rate             0.874631                       # miss rate for demand accesses
-system.cpu2.l1c.demand_misses                   60200                       # number of demand (read+write) misses
+system.cpu2.l1c.demand_accesses                 69020                       # number of demand (read+write) accesses
+system.cpu2.l1c.demand_avg_miss_latency  23487.631009                       # average overall miss latency
+system.cpu2.l1c.demand_avg_mshr_miss_latency 23479.951578                       # average overall mshr miss latency
+system.cpu2.l1c.demand_hits                      8490                       # number of demand (read+write) hits
+system.cpu2.l1c.demand_miss_latency        1421706305                       # number of demand (read+write) miss cycles
+system.cpu2.l1c.demand_miss_rate             0.876992                       # miss rate for demand accesses
+system.cpu2.l1c.demand_misses                   60530                       # number of demand (read+write) misses
 system.cpu2.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu2.l1c.demand_mshr_miss_latency    743340176                       # number of demand (read+write) MSHR miss cycles
-system.cpu2.l1c.demand_mshr_miss_rate        0.874631                       # mshr miss rate for demand accesses
-system.cpu2.l1c.demand_mshr_misses              60200                       # number of demand (read+write) MSHR misses
+system.cpu2.l1c.demand_mshr_miss_latency   1421241469                       # number of demand (read+write) MSHR miss cycles
+system.cpu2.l1c.demand_mshr_miss_rate        0.876992                       # mshr miss rate for demand accesses
+system.cpu2.l1c.demand_mshr_misses              60530                       # number of demand (read+write) MSHR misses
 system.cpu2.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu2.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu2.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu2.l1c.overall_accesses                68829                       # number of overall (read+write) accesses
-system.cpu2.l1c.overall_avg_miss_latency 13534.909535                       # average overall miss latency
-system.cpu2.l1c.overall_avg_mshr_miss_latency 12347.843455                       # average overall mshr miss latency
-system.cpu2.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu2.l1c.overall_hits                     8629                       # number of overall hits
-system.cpu2.l1c.overall_miss_latency        814801554                       # number of overall miss cycles
-system.cpu2.l1c.overall_miss_rate            0.874631                       # miss rate for overall accesses
-system.cpu2.l1c.overall_misses                  60200                       # number of overall misses
+system.cpu2.l1c.overall_accesses                69020                       # number of overall (read+write) accesses
+system.cpu2.l1c.overall_avg_miss_latency 23487.631009                       # average overall miss latency
+system.cpu2.l1c.overall_avg_mshr_miss_latency 23479.951578                       # average overall mshr miss latency
+system.cpu2.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu2.l1c.overall_hits                     8490                       # number of overall hits
+system.cpu2.l1c.overall_miss_latency       1421706305                       # number of overall miss cycles
+system.cpu2.l1c.overall_miss_rate            0.876992                       # miss rate for overall accesses
+system.cpu2.l1c.overall_misses                  60530                       # number of overall misses
 system.cpu2.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu2.l1c.overall_mshr_miss_latency    743340176                       # number of overall MSHR miss cycles
-system.cpu2.l1c.overall_mshr_miss_rate       0.874631                       # mshr miss rate for overall accesses
-system.cpu2.l1c.overall_mshr_misses             60200                       # number of overall MSHR misses
-system.cpu2.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu2.l1c.overall_mshr_uncacheable_misses        15341                       # number of overall MSHR uncacheable misses
+system.cpu2.l1c.overall_mshr_miss_latency   1421241469                       # number of overall MSHR miss cycles
+system.cpu2.l1c.overall_mshr_miss_rate       0.876992                       # mshr miss rate for overall accesses
+system.cpu2.l1c.overall_mshr_misses             60530                       # number of overall MSHR misses
+system.cpu2.l1c.overall_mshr_uncacheable_latency    840798944                       # number of overall MSHR uncacheable cycles
+system.cpu2.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu2.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu2.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu2.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -290,104 +233,76 @@ system.cpu2.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu2.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu2.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu2.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu2.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu2.l1c.protocol.read_invalid         1818161                       # read misses to invalid blocks
-system.cpu2.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu2.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu2.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu2.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu2.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu2.l1c.protocol.snoop_read_exclusive         2846                       # read snoops on exclusive blocks
-system.cpu2.l1c.protocol.snoop_read_modified        12505                       # read snoops on modified blocks
-system.cpu2.l1c.protocol.snoop_read_owned         7354                       # read snoops on owned blocks
-system.cpu2.l1c.protocol.snoop_read_shared      1719896                       # read snoops on shared blocks
-system.cpu2.l1c.protocol.snoop_readex_exclusive         1512                       # readEx snoops on exclusive blocks
-system.cpu2.l1c.protocol.snoop_readex_modified         6836                       # readEx snoops on modified blocks
-system.cpu2.l1c.protocol.snoop_readex_owned         4066                       # readEx snoops on owned blocks
-system.cpu2.l1c.protocol.snoop_readex_shared        12494                       # readEx snoops on shared blocks
-system.cpu2.l1c.protocol.snoop_upgrade_owned          828                       # upgrade snoops on owned blocks
-system.cpu2.l1c.protocol.snoop_upgrade_shared         2975                       # upgradee snoops on shared blocks
-system.cpu2.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu2.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu2.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu2.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu2.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu2.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu2.l1c.protocol.write_invalid        1061132                       # write misses to invalid blocks
-system.cpu2.l1c.protocol.write_owned             1410                       # write misses to owned blocks
-system.cpu2.l1c.protocol.write_shared            4436                       # write misses to shared blocks
-system.cpu2.l1c.replacements                    27337                       # number of replacements
-system.cpu2.l1c.sampled_refs                    27674                       # Sample count of references to valid blocks.
+system.cpu2.l1c.replacements                    27570                       # number of replacements
+system.cpu2.l1c.sampled_refs                    27912                       # Sample count of references to valid blocks.
 system.cpu2.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu2.l1c.tagsinuse                  343.290844                       # Cycle average of tags in use
-system.cpu2.l1c.total_refs                      11477                       # Total number of references to valid blocks.
+system.cpu2.l1c.tagsinuse                  346.579014                       # Cycle average of tags in use
+system.cpu2.l1c.total_refs                      11643                       # Total number of references to valid blocks.
 system.cpu2.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu2.l1c.writebacks                      10872                       # number of writebacks
+system.cpu2.l1c.writebacks                      10678                       # number of writebacks
 system.cpu2.num_copies                              0                       # number of copy accesses completed
-system.cpu2.num_reads                           98887                       # number of read accesses completed
-system.cpu2.num_writes                          53640                       # number of write accesses completed
-system.cpu3.l1c.ReadReq_accesses                44566                       # number of ReadReq accesses(hits+misses)
-system.cpu3.l1c.ReadReq_avg_miss_latency 14066.553951                       # average ReadReq miss latency
-system.cpu3.l1c.ReadReq_avg_mshr_miss_latency 13052.525235                       # average ReadReq mshr miss latency
-system.cpu3.l1c.ReadReq_hits                     7375                       # number of ReadReq hits
-system.cpu3.l1c.ReadReq_miss_latency        523149208                       # number of ReadReq miss cycles
-system.cpu3.l1c.ReadReq_miss_rate            0.834515                       # miss rate for ReadReq accesses
-system.cpu3.l1c.ReadReq_misses                  37191                       # number of ReadReq misses
-system.cpu3.l1c.ReadReq_mshr_miss_latency    485436466                       # number of ReadReq MSHR miss cycles
-system.cpu3.l1c.ReadReq_mshr_miss_rate       0.834515                       # mshr miss rate for ReadReq accesses
-system.cpu3.l1c.ReadReq_mshr_misses             37191                       # number of ReadReq MSHR misses
-system.cpu3.l1c.ReadReq_mshr_uncacheable         9820                       # number of ReadReq MSHR uncacheable
-system.cpu3.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu3.l1c.ReadResp_mshr_uncacheable_latency    252799971                       # number of ReadResp MSHR uncacheable cycles
-system.cpu3.l1c.WriteReq_accesses               24030                       # number of WriteReq accesses(hits+misses)
-system.cpu3.l1c.WriteReq_avg_miss_latency 12807.474484                       # average WriteReq miss latency
-system.cpu3.l1c.WriteReq_avg_mshr_miss_latency 11345.837164                       # average WriteReq mshr miss latency
-system.cpu3.l1c.WriteReq_hits                    1142                       # number of WriteReq hits
-system.cpu3.l1c.WriteReq_miss_latency       293137476                       # number of WriteReq miss cycles
-system.cpu3.l1c.WriteReq_miss_rate           0.952476                       # miss rate for WriteReq accesses
-system.cpu3.l1c.WriteReq_misses                 22888                       # number of WriteReq misses
-system.cpu3.l1c.WriteReq_mshr_miss_latency    259683521                       # number of WriteReq MSHR miss cycles
-system.cpu3.l1c.WriteReq_mshr_miss_rate      0.952476                       # mshr miss rate for WriteReq accesses
-system.cpu3.l1c.WriteReq_mshr_misses            22888                       # number of WriteReq MSHR misses
-system.cpu3.l1c.WriteReq_mshr_uncacheable         5294                       # number of WriteReq MSHR uncacheable
-system.cpu3.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu3.l1c.WriteResp_mshr_uncacheable_latency    159218905                       # number of WriteResp MSHR uncacheable cycles
-system.cpu3.l1c.avg_blocked_cycles_no_mshrs  1193.729049                       # average number of cycles each access was blocked
+system.cpu2.num_reads                           99982                       # number of read accesses completed
+system.cpu2.num_writes                          53451                       # number of write accesses completed
+system.cpu3.l1c.ReadReq_accesses                45026                       # number of ReadReq accesses(hits+misses)
+system.cpu3.l1c.ReadReq_avg_miss_latency 22627.689991                       # average ReadReq miss latency
+system.cpu3.l1c.ReadReq_avg_mshr_miss_latency 22619.691218                       # average ReadReq mshr miss latency
+system.cpu3.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu3.l1c.ReadReq_hits                     7540                       # number of ReadReq hits
+system.cpu3.l1c.ReadReq_miss_latency        848221587                       # number of ReadReq miss cycles
+system.cpu3.l1c.ReadReq_miss_rate            0.832541                       # miss rate for ReadReq accesses
+system.cpu3.l1c.ReadReq_misses                  37486                       # number of ReadReq misses
+system.cpu3.l1c.ReadReq_mshr_miss_latency    847921745                       # number of ReadReq MSHR miss cycles
+system.cpu3.l1c.ReadReq_mshr_miss_rate       0.832541                       # mshr miss rate for ReadReq accesses
+system.cpu3.l1c.ReadReq_mshr_misses             37486                       # number of ReadReq MSHR misses
+system.cpu3.l1c.ReadReq_mshr_uncacheable_latency    521058272                       # number of ReadReq MSHR uncacheable cycles
+system.cpu3.l1c.WriteReq_accesses               24496                       # number of WriteReq accesses(hits+misses)
+system.cpu3.l1c.WriteReq_avg_miss_latency 24499.134103                       # average WriteReq miss latency
+system.cpu3.l1c.WriteReq_avg_mshr_miss_latency 24491.950730                       # average WriteReq mshr miss latency
+system.cpu3.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu3.l1c.WriteReq_hits                     932                       # number of WriteReq hits
+system.cpu3.l1c.WriteReq_miss_latency       577297596                       # number of WriteReq miss cycles
+system.cpu3.l1c.WriteReq_miss_rate           0.961953                       # miss rate for WriteReq accesses
+system.cpu3.l1c.WriteReq_misses                 23564                       # number of WriteReq misses
+system.cpu3.l1c.WriteReq_mshr_miss_latency    577128327                       # number of WriteReq MSHR miss cycles
+system.cpu3.l1c.WriteReq_mshr_miss_rate      0.961953                       # mshr miss rate for WriteReq accesses
+system.cpu3.l1c.WriteReq_mshr_misses            23564                       # number of WriteReq MSHR misses
+system.cpu3.l1c.WriteReq_mshr_uncacheable_latency    316556554                       # number of WriteReq MSHR uncacheable cycles
+system.cpu3.l1c.avg_blocked_cycles_no_mshrs  2277.071019                       # average number of cycles each access was blocked
 system.cpu3.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu3.l1c.avg_refs                     0.411345                       # Average number of references to valid blocks.
-system.cpu3.l1c.blocked_no_mshrs                69160                       # number of cycles access was blocked
+system.cpu3.l1c.avg_refs                     0.408241                       # Average number of references to valid blocks.
+system.cpu3.l1c.blocked_no_mshrs                69700                       # number of cycles access was blocked
 system.cpu3.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu3.l1c.blocked_cycles_no_mshrs      82558301                       # number of cycles access was blocked
+system.cpu3.l1c.blocked_cycles_no_mshrs     158711850                       # number of cycles access was blocked
 system.cpu3.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu3.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu3.l1c.demand_accesses                 68596                       # number of demand (read+write) accesses
-system.cpu3.l1c.demand_avg_miss_latency  13586.888663                       # average overall miss latency
-system.cpu3.l1c.demand_avg_mshr_miss_latency 12402.336707                       # average overall mshr miss latency
-system.cpu3.l1c.demand_hits                      8517                       # number of demand (read+write) hits
-system.cpu3.l1c.demand_miss_latency         816286684                       # number of demand (read+write) miss cycles
-system.cpu3.l1c.demand_miss_rate             0.875838                       # miss rate for demand accesses
-system.cpu3.l1c.demand_misses                   60079                       # number of demand (read+write) misses
+system.cpu3.l1c.demand_accesses                 69522                       # number of demand (read+write) accesses
+system.cpu3.l1c.demand_avg_miss_latency  23350.027568                       # average overall miss latency
+system.cpu3.l1c.demand_avg_mshr_miss_latency 23342.343522                       # average overall mshr miss latency
+system.cpu3.l1c.demand_hits                      8472                       # number of demand (read+write) hits
+system.cpu3.l1c.demand_miss_latency        1425519183                       # number of demand (read+write) miss cycles
+system.cpu3.l1c.demand_miss_rate             0.878139                       # miss rate for demand accesses
+system.cpu3.l1c.demand_misses                   61050                       # number of demand (read+write) misses
 system.cpu3.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu3.l1c.demand_mshr_miss_latency    745119987                       # number of demand (read+write) MSHR miss cycles
-system.cpu3.l1c.demand_mshr_miss_rate        0.875838                       # mshr miss rate for demand accesses
-system.cpu3.l1c.demand_mshr_misses              60079                       # number of demand (read+write) MSHR misses
+system.cpu3.l1c.demand_mshr_miss_latency   1425050072                       # number of demand (read+write) MSHR miss cycles
+system.cpu3.l1c.demand_mshr_miss_rate        0.878139                       # mshr miss rate for demand accesses
+system.cpu3.l1c.demand_mshr_misses              61050                       # number of demand (read+write) MSHR misses
 system.cpu3.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu3.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu3.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu3.l1c.overall_accesses                68596                       # number of overall (read+write) accesses
-system.cpu3.l1c.overall_avg_miss_latency 13586.888663                       # average overall miss latency
-system.cpu3.l1c.overall_avg_mshr_miss_latency 12402.336707                       # average overall mshr miss latency
-system.cpu3.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu3.l1c.overall_hits                     8517                       # number of overall hits
-system.cpu3.l1c.overall_miss_latency        816286684                       # number of overall miss cycles
-system.cpu3.l1c.overall_miss_rate            0.875838                       # miss rate for overall accesses
-system.cpu3.l1c.overall_misses                  60079                       # number of overall misses
+system.cpu3.l1c.overall_accesses                69522                       # number of overall (read+write) accesses
+system.cpu3.l1c.overall_avg_miss_latency 23350.027568                       # average overall miss latency
+system.cpu3.l1c.overall_avg_mshr_miss_latency 23342.343522                       # average overall mshr miss latency
+system.cpu3.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu3.l1c.overall_hits                     8472                       # number of overall hits
+system.cpu3.l1c.overall_miss_latency       1425519183                       # number of overall miss cycles
+system.cpu3.l1c.overall_miss_rate            0.878139                       # miss rate for overall accesses
+system.cpu3.l1c.overall_misses                  61050                       # number of overall misses
 system.cpu3.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu3.l1c.overall_mshr_miss_latency    745119987                       # number of overall MSHR miss cycles
-system.cpu3.l1c.overall_mshr_miss_rate       0.875838                       # mshr miss rate for overall accesses
-system.cpu3.l1c.overall_mshr_misses             60079                       # number of overall MSHR misses
-system.cpu3.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu3.l1c.overall_mshr_uncacheable_misses        15114                       # number of overall MSHR uncacheable misses
+system.cpu3.l1c.overall_mshr_miss_latency   1425050072                       # number of overall MSHR miss cycles
+system.cpu3.l1c.overall_mshr_miss_rate       0.878139                       # mshr miss rate for overall accesses
+system.cpu3.l1c.overall_mshr_misses             61050                       # number of overall MSHR misses
+system.cpu3.l1c.overall_mshr_uncacheable_latency    837614826                       # number of overall MSHR uncacheable cycles
+system.cpu3.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu3.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu3.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu3.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -397,104 +312,76 @@ system.cpu3.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu3.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu3.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu3.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu3.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu3.l1c.protocol.read_invalid         1894373                       # read misses to invalid blocks
-system.cpu3.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu3.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu3.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu3.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu3.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu3.l1c.protocol.snoop_read_exclusive         2902                       # read snoops on exclusive blocks
-system.cpu3.l1c.protocol.snoop_read_modified        12291                       # read snoops on modified blocks
-system.cpu3.l1c.protocol.snoop_read_owned         7221                       # read snoops on owned blocks
-system.cpu3.l1c.protocol.snoop_read_shared      1743434                       # read snoops on shared blocks
-system.cpu3.l1c.protocol.snoop_readex_exclusive         1553                       # readEx snoops on exclusive blocks
-system.cpu3.l1c.protocol.snoop_readex_modified         6822                       # readEx snoops on modified blocks
-system.cpu3.l1c.protocol.snoop_readex_owned         3914                       # readEx snoops on owned blocks
-system.cpu3.l1c.protocol.snoop_readex_shared        12477                       # readEx snoops on shared blocks
-system.cpu3.l1c.protocol.snoop_upgrade_owned          867                       # upgrade snoops on owned blocks
-system.cpu3.l1c.protocol.snoop_upgrade_shared         3008                       # upgradee snoops on shared blocks
-system.cpu3.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu3.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu3.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu3.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu3.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu3.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu3.l1c.protocol.write_invalid        1046634                       # write misses to invalid blocks
-system.cpu3.l1c.protocol.write_owned             1364                       # write misses to owned blocks
-system.cpu3.l1c.protocol.write_shared            4484                       # write misses to shared blocks
-system.cpu3.l1c.replacements                    27286                       # number of replacements
-system.cpu3.l1c.sampled_refs                    27624                       # Sample count of references to valid blocks.
+system.cpu3.l1c.replacements                    28153                       # number of replacements
+system.cpu3.l1c.sampled_refs                    28515                       # Sample count of references to valid blocks.
 system.cpu3.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu3.l1c.tagsinuse                  342.290575                       # Cycle average of tags in use
-system.cpu3.l1c.total_refs                      11363                       # Total number of references to valid blocks.
+system.cpu3.l1c.tagsinuse                  348.493440                       # Cycle average of tags in use
+system.cpu3.l1c.total_refs                      11641                       # Total number of references to valid blocks.
 system.cpu3.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu3.l1c.writebacks                      10681                       # number of writebacks
+system.cpu3.l1c.writebacks                      11085                       # number of writebacks
 system.cpu3.num_copies                              0                       # number of copy accesses completed
-system.cpu3.num_reads                           99322                       # number of read accesses completed
-system.cpu3.num_writes                          53280                       # number of write accesses completed
-system.cpu4.l1c.ReadReq_accesses                44971                       # number of ReadReq accesses(hits+misses)
-system.cpu4.l1c.ReadReq_avg_miss_latency 13943.186039                       # average ReadReq miss latency
-system.cpu4.l1c.ReadReq_avg_mshr_miss_latency 12937.718615                       # average ReadReq mshr miss latency
-system.cpu4.l1c.ReadReq_hits                     7581                       # number of ReadReq hits
-system.cpu4.l1c.ReadReq_miss_latency        521335726                       # number of ReadReq miss cycles
-system.cpu4.l1c.ReadReq_miss_rate            0.831425                       # miss rate for ReadReq accesses
-system.cpu4.l1c.ReadReq_misses                  37390                       # number of ReadReq misses
-system.cpu4.l1c.ReadReq_mshr_miss_latency    483741299                       # number of ReadReq MSHR miss cycles
-system.cpu4.l1c.ReadReq_mshr_miss_rate       0.831425                       # mshr miss rate for ReadReq accesses
-system.cpu4.l1c.ReadReq_mshr_misses             37390                       # number of ReadReq MSHR misses
-system.cpu4.l1c.ReadReq_mshr_uncacheable         9931                       # number of ReadReq MSHR uncacheable
-system.cpu4.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu4.l1c.ReadResp_mshr_uncacheable_latency    254015216                       # number of ReadResp MSHR uncacheable cycles
-system.cpu4.l1c.WriteReq_accesses               24134                       # number of WriteReq accesses(hits+misses)
-system.cpu4.l1c.WriteReq_avg_miss_latency 12764.573629                       # average WriteReq miss latency
-system.cpu4.l1c.WriteReq_avg_mshr_miss_latency 11273.971841                       # average WriteReq mshr miss latency
-system.cpu4.l1c.WriteReq_hits                    1086                       # number of WriteReq hits
-system.cpu4.l1c.WriteReq_miss_latency       294197893                       # number of WriteReq miss cycles
-system.cpu4.l1c.WriteReq_miss_rate           0.955001                       # miss rate for WriteReq accesses
-system.cpu4.l1c.WriteReq_misses                 23048                       # number of WriteReq misses
-system.cpu4.l1c.WriteReq_mshr_miss_latency    259842503                       # number of WriteReq MSHR miss cycles
-system.cpu4.l1c.WriteReq_mshr_miss_rate      0.955001                       # mshr miss rate for WriteReq accesses
-system.cpu4.l1c.WriteReq_mshr_misses            23048                       # number of WriteReq MSHR misses
-system.cpu4.l1c.WriteReq_mshr_uncacheable         5390                       # number of WriteReq MSHR uncacheable
-system.cpu4.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu4.l1c.WriteResp_mshr_uncacheable_latency    161643344                       # number of WriteResp MSHR uncacheable cycles
-system.cpu4.l1c.avg_blocked_cycles_no_mshrs  1186.636056                       # average number of cycles each access was blocked
+system.cpu3.num_reads                           99697                       # number of read accesses completed
+system.cpu3.num_writes                          54254                       # number of write accesses completed
+system.cpu4.l1c.ReadReq_accesses                44695                       # number of ReadReq accesses(hits+misses)
+system.cpu4.l1c.ReadReq_avg_miss_latency 22595.724111                       # average ReadReq miss latency
+system.cpu4.l1c.ReadReq_avg_mshr_miss_latency 22587.725051                       # average ReadReq mshr miss latency
+system.cpu4.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu4.l1c.ReadReq_hits                     7459                       # number of ReadReq hits
+system.cpu4.l1c.ReadReq_miss_latency        841374383                       # number of ReadReq miss cycles
+system.cpu4.l1c.ReadReq_miss_rate            0.833113                       # miss rate for ReadReq accesses
+system.cpu4.l1c.ReadReq_misses                  37236                       # number of ReadReq misses
+system.cpu4.l1c.ReadReq_mshr_miss_latency    841076530                       # number of ReadReq MSHR miss cycles
+system.cpu4.l1c.ReadReq_mshr_miss_rate       0.833113                       # mshr miss rate for ReadReq accesses
+system.cpu4.l1c.ReadReq_mshr_misses             37236                       # number of ReadReq MSHR misses
+system.cpu4.l1c.ReadReq_mshr_uncacheable_latency    521925270                       # number of ReadReq MSHR uncacheable cycles
+system.cpu4.l1c.WriteReq_accesses               24320                       # number of WriteReq accesses(hits+misses)
+system.cpu4.l1c.WriteReq_avg_miss_latency 24976.967619                       # average WriteReq miss latency
+system.cpu4.l1c.WriteReq_avg_mshr_miss_latency 24969.752460                       # average WriteReq mshr miss latency
+system.cpu4.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu4.l1c.WriteReq_hits                     942                       # number of WriteReq hits
+system.cpu4.l1c.WriteReq_miss_latency       583911549                       # number of WriteReq miss cycles
+system.cpu4.l1c.WriteReq_miss_rate           0.961266                       # miss rate for WriteReq accesses
+system.cpu4.l1c.WriteReq_misses                 23378                       # number of WriteReq misses
+system.cpu4.l1c.WriteReq_mshr_miss_latency    583742873                       # number of WriteReq MSHR miss cycles
+system.cpu4.l1c.WriteReq_mshr_miss_rate      0.961266                       # mshr miss rate for WriteReq accesses
+system.cpu4.l1c.WriteReq_mshr_misses            23378                       # number of WriteReq MSHR misses
+system.cpu4.l1c.WriteReq_mshr_uncacheable_latency    314744590                       # number of WriteReq MSHR uncacheable cycles
+system.cpu4.l1c.avg_blocked_cycles_no_mshrs  2286.910395                       # average number of cycles each access was blocked
 system.cpu4.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu4.l1c.avg_refs                     0.410931                       # Average number of references to valid blocks.
-system.cpu4.l1c.blocked_no_mshrs                69637                       # number of cycles access was blocked
+system.cpu4.l1c.avg_refs                     0.401516                       # Average number of references to valid blocks.
+system.cpu4.l1c.blocked_no_mshrs                69382                       # number of cycles access was blocked
 system.cpu4.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu4.l1c.blocked_cycles_no_mshrs      82633775                       # number of cycles access was blocked
+system.cpu4.l1c.blocked_cycles_no_mshrs     158670417                       # number of cycles access was blocked
 system.cpu4.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu4.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu4.l1c.demand_accesses                 69105                       # number of demand (read+write) accesses
-system.cpu4.l1c.demand_avg_miss_latency  13493.722807                       # average overall miss latency
-system.cpu4.l1c.demand_avg_mshr_miss_latency 12303.249644                       # average overall mshr miss latency
-system.cpu4.l1c.demand_hits                      8667                       # number of demand (read+write) hits
-system.cpu4.l1c.demand_miss_latency         815533619                       # number of demand (read+write) miss cycles
-system.cpu4.l1c.demand_miss_rate             0.874582                       # miss rate for demand accesses
-system.cpu4.l1c.demand_misses                   60438                       # number of demand (read+write) misses
+system.cpu4.l1c.demand_accesses                 69015                       # number of demand (read+write) accesses
+system.cpu4.l1c.demand_avg_miss_latency  23514.137526                       # average overall miss latency
+system.cpu4.l1c.demand_avg_mshr_miss_latency 23506.440806                       # average overall mshr miss latency
+system.cpu4.l1c.demand_hits                      8401                       # number of demand (read+write) hits
+system.cpu4.l1c.demand_miss_latency        1425285932                       # number of demand (read+write) miss cycles
+system.cpu4.l1c.demand_miss_rate             0.878273                       # miss rate for demand accesses
+system.cpu4.l1c.demand_misses                   60614                       # number of demand (read+write) misses
 system.cpu4.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu4.l1c.demand_mshr_miss_latency    743583802                       # number of demand (read+write) MSHR miss cycles
-system.cpu4.l1c.demand_mshr_miss_rate        0.874582                       # mshr miss rate for demand accesses
-system.cpu4.l1c.demand_mshr_misses              60438                       # number of demand (read+write) MSHR misses
+system.cpu4.l1c.demand_mshr_miss_latency   1424819403                       # number of demand (read+write) MSHR miss cycles
+system.cpu4.l1c.demand_mshr_miss_rate        0.878273                       # mshr miss rate for demand accesses
+system.cpu4.l1c.demand_mshr_misses              60614                       # number of demand (read+write) MSHR misses
 system.cpu4.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu4.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu4.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu4.l1c.overall_accesses                69105                       # number of overall (read+write) accesses
-system.cpu4.l1c.overall_avg_miss_latency 13493.722807                       # average overall miss latency
-system.cpu4.l1c.overall_avg_mshr_miss_latency 12303.249644                       # average overall mshr miss latency
-system.cpu4.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu4.l1c.overall_hits                     8667                       # number of overall hits
-system.cpu4.l1c.overall_miss_latency        815533619                       # number of overall miss cycles
-system.cpu4.l1c.overall_miss_rate            0.874582                       # miss rate for overall accesses
-system.cpu4.l1c.overall_misses                  60438                       # number of overall misses
+system.cpu4.l1c.overall_accesses                69015                       # number of overall (read+write) accesses
+system.cpu4.l1c.overall_avg_miss_latency 23514.137526                       # average overall miss latency
+system.cpu4.l1c.overall_avg_mshr_miss_latency 23506.440806                       # average overall mshr miss latency
+system.cpu4.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu4.l1c.overall_hits                     8401                       # number of overall hits
+system.cpu4.l1c.overall_miss_latency       1425285932                       # number of overall miss cycles
+system.cpu4.l1c.overall_miss_rate            0.878273                       # miss rate for overall accesses
+system.cpu4.l1c.overall_misses                  60614                       # number of overall misses
 system.cpu4.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu4.l1c.overall_mshr_miss_latency    743583802                       # number of overall MSHR miss cycles
-system.cpu4.l1c.overall_mshr_miss_rate       0.874582                       # mshr miss rate for overall accesses
-system.cpu4.l1c.overall_mshr_misses             60438                       # number of overall MSHR misses
-system.cpu4.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu4.l1c.overall_mshr_uncacheable_misses        15321                       # number of overall MSHR uncacheable misses
+system.cpu4.l1c.overall_mshr_miss_latency   1424819403                       # number of overall MSHR miss cycles
+system.cpu4.l1c.overall_mshr_miss_rate       0.878273                       # mshr miss rate for overall accesses
+system.cpu4.l1c.overall_mshr_misses             60614                       # number of overall MSHR misses
+system.cpu4.l1c.overall_mshr_uncacheable_latency    836669860                       # number of overall MSHR uncacheable cycles
+system.cpu4.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu4.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu4.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu4.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -504,104 +391,76 @@ system.cpu4.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu4.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu4.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu4.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu4.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu4.l1c.protocol.read_invalid         1830675                       # read misses to invalid blocks
-system.cpu4.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu4.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu4.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu4.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu4.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu4.l1c.protocol.snoop_read_exclusive         2847                       # read snoops on exclusive blocks
-system.cpu4.l1c.protocol.snoop_read_modified        12499                       # read snoops on modified blocks
-system.cpu4.l1c.protocol.snoop_read_owned         7458                       # read snoops on owned blocks
-system.cpu4.l1c.protocol.snoop_read_shared      1765770                       # read snoops on shared blocks
-system.cpu4.l1c.protocol.snoop_readex_exclusive         1560                       # readEx snoops on exclusive blocks
-system.cpu4.l1c.protocol.snoop_readex_modified         6711                       # readEx snoops on modified blocks
-system.cpu4.l1c.protocol.snoop_readex_owned         3919                       # readEx snoops on owned blocks
-system.cpu4.l1c.protocol.snoop_readex_shared        12526                       # readEx snoops on shared blocks
-system.cpu4.l1c.protocol.snoop_upgrade_owned          902                       # upgrade snoops on owned blocks
-system.cpu4.l1c.protocol.snoop_upgrade_shared         3023                       # upgradee snoops on shared blocks
-system.cpu4.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu4.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu4.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu4.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu4.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu4.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu4.l1c.protocol.write_invalid         854606                       # write misses to invalid blocks
-system.cpu4.l1c.protocol.write_owned             1318                       # write misses to owned blocks
-system.cpu4.l1c.protocol.write_shared            4519                       # write misses to shared blocks
-system.cpu4.l1c.replacements                    27664                       # number of replacements
-system.cpu4.l1c.sampled_refs                    28012                       # Sample count of references to valid blocks.
+system.cpu4.l1c.replacements                    28031                       # number of replacements
+system.cpu4.l1c.sampled_refs                    28370                       # Sample count of references to valid blocks.
 system.cpu4.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu4.l1c.tagsinuse                  344.185288                       # Cycle average of tags in use
-system.cpu4.l1c.total_refs                      11511                       # Total number of references to valid blocks.
+system.cpu4.l1c.tagsinuse                  347.544315                       # Cycle average of tags in use
+system.cpu4.l1c.total_refs                      11391                       # Total number of references to valid blocks.
 system.cpu4.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu4.l1c.writebacks                      10935                       # number of writebacks
+system.cpu4.l1c.writebacks                      11138                       # number of writebacks
 system.cpu4.num_copies                              0                       # number of copy accesses completed
-system.cpu4.num_reads                           99841                       # number of read accesses completed
-system.cpu4.num_writes                          54005                       # number of write accesses completed
-system.cpu5.l1c.ReadReq_accesses                45075                       # number of ReadReq accesses(hits+misses)
-system.cpu5.l1c.ReadReq_avg_miss_latency 13980.675167                       # average ReadReq miss latency
-system.cpu5.l1c.ReadReq_avg_mshr_miss_latency 12974.186518                       # average ReadReq mshr miss latency
-system.cpu5.l1c.ReadReq_hits                     7588                       # number of ReadReq hits
-system.cpu5.l1c.ReadReq_miss_latency        524093570                       # number of ReadReq miss cycles
-system.cpu5.l1c.ReadReq_miss_rate            0.831658                       # miss rate for ReadReq accesses
-system.cpu5.l1c.ReadReq_misses                  37487                       # number of ReadReq misses
-system.cpu5.l1c.ReadReq_mshr_miss_latency    486363330                       # number of ReadReq MSHR miss cycles
-system.cpu5.l1c.ReadReq_mshr_miss_rate       0.831658                       # mshr miss rate for ReadReq accesses
-system.cpu5.l1c.ReadReq_mshr_misses             37487                       # number of ReadReq MSHR misses
-system.cpu5.l1c.ReadReq_mshr_uncacheable         9769                       # number of ReadReq MSHR uncacheable
-system.cpu5.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu5.l1c.ReadResp_mshr_uncacheable_latency    252483534                       # number of ReadResp MSHR uncacheable cycles
-system.cpu5.l1c.WriteReq_accesses               24120                       # number of WriteReq accesses(hits+misses)
-system.cpu5.l1c.WriteReq_avg_miss_latency 12733.111936                       # average WriteReq miss latency
-system.cpu5.l1c.WriteReq_avg_mshr_miss_latency 11249.826210                       # average WriteReq mshr miss latency
-system.cpu5.l1c.WriteReq_hits                    1098                       # number of WriteReq hits
-system.cpu5.l1c.WriteReq_miss_latency       293141703                       # number of WriteReq miss cycles
-system.cpu5.l1c.WriteReq_miss_rate           0.954478                       # miss rate for WriteReq accesses
-system.cpu5.l1c.WriteReq_misses                 23022                       # number of WriteReq misses
-system.cpu5.l1c.WriteReq_mshr_miss_latency    258993499                       # number of WriteReq MSHR miss cycles
-system.cpu5.l1c.WriteReq_mshr_miss_rate      0.954478                       # mshr miss rate for WriteReq accesses
-system.cpu5.l1c.WriteReq_mshr_misses            23022                       # number of WriteReq MSHR misses
-system.cpu5.l1c.WriteReq_mshr_uncacheable         5232                       # number of WriteReq MSHR uncacheable
-system.cpu5.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu5.l1c.WriteResp_mshr_uncacheable_latency    155064988                       # number of WriteResp MSHR uncacheable cycles
-system.cpu5.l1c.avg_blocked_cycles_no_mshrs  1188.349008                       # average number of cycles each access was blocked
+system.cpu4.num_reads                           99375                       # number of read accesses completed
+system.cpu4.num_writes                          53856                       # number of write accesses completed
+system.cpu5.l1c.ReadReq_accesses                44846                       # number of ReadReq accesses(hits+misses)
+system.cpu5.l1c.ReadReq_avg_miss_latency 22795.859807                       # average ReadReq miss latency
+system.cpu5.l1c.ReadReq_avg_mshr_miss_latency 22787.860584                       # average ReadReq mshr miss latency
+system.cpu5.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu5.l1c.ReadReq_hits                     7526                       # number of ReadReq hits
+system.cpu5.l1c.ReadReq_miss_latency        850741488                       # number of ReadReq miss cycles
+system.cpu5.l1c.ReadReq_miss_rate            0.832181                       # miss rate for ReadReq accesses
+system.cpu5.l1c.ReadReq_misses                  37320                       # number of ReadReq misses
+system.cpu5.l1c.ReadReq_mshr_miss_latency    850442957                       # number of ReadReq MSHR miss cycles
+system.cpu5.l1c.ReadReq_mshr_miss_rate       0.832181                       # mshr miss rate for ReadReq accesses
+system.cpu5.l1c.ReadReq_mshr_misses             37320                       # number of ReadReq MSHR misses
+system.cpu5.l1c.ReadReq_mshr_uncacheable_latency    518680326                       # number of ReadReq MSHR uncacheable cycles
+system.cpu5.l1c.WriteReq_accesses               24378                       # number of WriteReq accesses(hits+misses)
+system.cpu5.l1c.WriteReq_avg_miss_latency 24686.676265                       # average WriteReq miss latency
+system.cpu5.l1c.WriteReq_avg_mshr_miss_latency 24679.493004                       # average WriteReq mshr miss latency
+system.cpu5.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu5.l1c.WriteReq_hits                     936                       # number of WriteReq hits
+system.cpu5.l1c.WriteReq_miss_latency       578705065                       # number of WriteReq miss cycles
+system.cpu5.l1c.WriteReq_miss_rate           0.961605                       # miss rate for WriteReq accesses
+system.cpu5.l1c.WriteReq_misses                 23442                       # number of WriteReq misses
+system.cpu5.l1c.WriteReq_mshr_miss_latency    578536675                       # number of WriteReq MSHR miss cycles
+system.cpu5.l1c.WriteReq_mshr_miss_rate      0.961605                       # mshr miss rate for WriteReq accesses
+system.cpu5.l1c.WriteReq_mshr_misses            23442                       # number of WriteReq MSHR misses
+system.cpu5.l1c.WriteReq_mshr_uncacheable_latency    315478251                       # number of WriteReq MSHR uncacheable cycles
+system.cpu5.l1c.avg_blocked_cycles_no_mshrs  2288.071694                       # average number of cycles each access was blocked
 system.cpu5.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu5.l1c.avg_refs                     0.414917                       # Average number of references to valid blocks.
-system.cpu5.l1c.blocked_no_mshrs                69537                       # number of cycles access was blocked
+system.cpu5.l1c.avg_refs                     0.412333                       # Average number of references to valid blocks.
+system.cpu5.l1c.blocked_no_mshrs                69434                       # number of cycles access was blocked
 system.cpu5.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu5.l1c.blocked_cycles_no_mshrs      82634225                       # number of cycles access was blocked
+system.cpu5.l1c.blocked_cycles_no_mshrs     158869970                       # number of cycles access was blocked
 system.cpu5.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu5.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu5.l1c.demand_accesses                 69195                       # number of demand (read+write) accesses
-system.cpu5.l1c.demand_avg_miss_latency  13506.011883                       # average overall miss latency
-system.cpu5.l1c.demand_avg_mshr_miss_latency 12318.115140                       # average overall mshr miss latency
-system.cpu5.l1c.demand_hits                      8686                       # number of demand (read+write) hits
-system.cpu5.l1c.demand_miss_latency         817235273                       # number of demand (read+write) miss cycles
-system.cpu5.l1c.demand_miss_rate             0.874471                       # miss rate for demand accesses
-system.cpu5.l1c.demand_misses                   60509                       # number of demand (read+write) misses
+system.cpu5.l1c.demand_accesses                 69224                       # number of demand (read+write) accesses
+system.cpu5.l1c.demand_avg_miss_latency  23525.337431                       # average overall miss latency
+system.cpu5.l1c.demand_avg_mshr_miss_latency 23517.653007                       # average overall mshr miss latency
+system.cpu5.l1c.demand_hits                      8462                       # number of demand (read+write) hits
+system.cpu5.l1c.demand_miss_latency        1429446553                       # number of demand (read+write) miss cycles
+system.cpu5.l1c.demand_miss_rate             0.877759                       # miss rate for demand accesses
+system.cpu5.l1c.demand_misses                   60762                       # number of demand (read+write) misses
 system.cpu5.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu5.l1c.demand_mshr_miss_latency    745356829                       # number of demand (read+write) MSHR miss cycles
-system.cpu5.l1c.demand_mshr_miss_rate        0.874471                       # mshr miss rate for demand accesses
-system.cpu5.l1c.demand_mshr_misses              60509                       # number of demand (read+write) MSHR misses
+system.cpu5.l1c.demand_mshr_miss_latency   1428979632                       # number of demand (read+write) MSHR miss cycles
+system.cpu5.l1c.demand_mshr_miss_rate        0.877759                       # mshr miss rate for demand accesses
+system.cpu5.l1c.demand_mshr_misses              60762                       # number of demand (read+write) MSHR misses
 system.cpu5.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu5.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu5.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu5.l1c.overall_accesses                69195                       # number of overall (read+write) accesses
-system.cpu5.l1c.overall_avg_miss_latency 13506.011883                       # average overall miss latency
-system.cpu5.l1c.overall_avg_mshr_miss_latency 12318.115140                       # average overall mshr miss latency
-system.cpu5.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu5.l1c.overall_hits                     8686                       # number of overall hits
-system.cpu5.l1c.overall_miss_latency        817235273                       # number of overall miss cycles
-system.cpu5.l1c.overall_miss_rate            0.874471                       # miss rate for overall accesses
-system.cpu5.l1c.overall_misses                  60509                       # number of overall misses
+system.cpu5.l1c.overall_accesses                69224                       # number of overall (read+write) accesses
+system.cpu5.l1c.overall_avg_miss_latency 23525.337431                       # average overall miss latency
+system.cpu5.l1c.overall_avg_mshr_miss_latency 23517.653007                       # average overall mshr miss latency
+system.cpu5.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu5.l1c.overall_hits                     8462                       # number of overall hits
+system.cpu5.l1c.overall_miss_latency       1429446553                       # number of overall miss cycles
+system.cpu5.l1c.overall_miss_rate            0.877759                       # miss rate for overall accesses
+system.cpu5.l1c.overall_misses                  60762                       # number of overall misses
 system.cpu5.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu5.l1c.overall_mshr_miss_latency    745356829                       # number of overall MSHR miss cycles
-system.cpu5.l1c.overall_mshr_miss_rate       0.874471                       # mshr miss rate for overall accesses
-system.cpu5.l1c.overall_mshr_misses             60509                       # number of overall MSHR misses
-system.cpu5.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu5.l1c.overall_mshr_uncacheable_misses        15001                       # number of overall MSHR uncacheable misses
+system.cpu5.l1c.overall_mshr_miss_latency   1428979632                       # number of overall MSHR miss cycles
+system.cpu5.l1c.overall_mshr_miss_rate       0.877759                       # mshr miss rate for overall accesses
+system.cpu5.l1c.overall_mshr_misses             60762                       # number of overall MSHR misses
+system.cpu5.l1c.overall_mshr_uncacheable_latency    834158577                       # number of overall MSHR uncacheable cycles
+system.cpu5.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu5.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu5.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu5.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -611,104 +470,76 @@ system.cpu5.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu5.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu5.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu5.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu5.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu5.l1c.protocol.read_invalid         1718821                       # read misses to invalid blocks
-system.cpu5.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu5.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu5.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu5.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu5.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu5.l1c.protocol.snoop_read_exclusive         2926                       # read snoops on exclusive blocks
-system.cpu5.l1c.protocol.snoop_read_modified        12465                       # read snoops on modified blocks
-system.cpu5.l1c.protocol.snoop_read_owned         7201                       # read snoops on owned blocks
-system.cpu5.l1c.protocol.snoop_read_shared      1810557                       # read snoops on shared blocks
-system.cpu5.l1c.protocol.snoop_readex_exclusive         1622                       # readEx snoops on exclusive blocks
-system.cpu5.l1c.protocol.snoop_readex_modified         6690                       # readEx snoops on modified blocks
-system.cpu5.l1c.protocol.snoop_readex_owned         3947                       # readEx snoops on owned blocks
-system.cpu5.l1c.protocol.snoop_readex_shared        12574                       # readEx snoops on shared blocks
-system.cpu5.l1c.protocol.snoop_upgrade_owned          818                       # upgrade snoops on owned blocks
-system.cpu5.l1c.protocol.snoop_upgrade_shared         3092                       # upgradee snoops on shared blocks
-system.cpu5.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu5.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu5.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu5.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu5.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu5.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu5.l1c.protocol.write_invalid         914561                       # write misses to invalid blocks
-system.cpu5.l1c.protocol.write_owned             1422                       # write misses to owned blocks
-system.cpu5.l1c.protocol.write_shared            4534                       # write misses to shared blocks
-system.cpu5.l1c.replacements                    27551                       # number of replacements
-system.cpu5.l1c.sampled_refs                    27914                       # Sample count of references to valid blocks.
+system.cpu5.l1c.replacements                    27718                       # number of replacements
+system.cpu5.l1c.sampled_refs                    28055                       # Sample count of references to valid blocks.
 system.cpu5.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu5.l1c.tagsinuse                  344.440637                       # Cycle average of tags in use
-system.cpu5.l1c.total_refs                      11582                       # Total number of references to valid blocks.
+system.cpu5.l1c.tagsinuse                  345.552063                       # Cycle average of tags in use
+system.cpu5.l1c.total_refs                      11568                       # Total number of references to valid blocks.
 system.cpu5.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu5.l1c.writebacks                      10931                       # number of writebacks
+system.cpu5.l1c.writebacks                      10910                       # number of writebacks
 system.cpu5.num_copies                              0                       # number of copy accesses completed
-system.cpu5.num_reads                           99674                       # number of read accesses completed
-system.cpu5.num_writes                          53393                       # number of write accesses completed
-system.cpu6.l1c.ReadReq_accesses                44595                       # number of ReadReq accesses(hits+misses)
-system.cpu6.l1c.ReadReq_avg_miss_latency 14001.082353                       # average ReadReq miss latency
-system.cpu6.l1c.ReadReq_avg_mshr_miss_latency 12995.526917                       # average ReadReq mshr miss latency
-system.cpu6.l1c.ReadReq_hits                     7462                       # number of ReadReq hits
-system.cpu6.l1c.ReadReq_miss_latency        519902191                       # number of ReadReq miss cycles
-system.cpu6.l1c.ReadReq_miss_rate            0.832672                       # miss rate for ReadReq accesses
-system.cpu6.l1c.ReadReq_misses                  37133                       # number of ReadReq misses
-system.cpu6.l1c.ReadReq_mshr_miss_latency    482562901                       # number of ReadReq MSHR miss cycles
-system.cpu6.l1c.ReadReq_mshr_miss_rate       0.832672                       # mshr miss rate for ReadReq accesses
-system.cpu6.l1c.ReadReq_mshr_misses             37133                       # number of ReadReq MSHR misses
-system.cpu6.l1c.ReadReq_mshr_uncacheable         9820                       # number of ReadReq MSHR uncacheable
-system.cpu6.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu6.l1c.ReadResp_mshr_uncacheable_latency    251671127                       # number of ReadResp MSHR uncacheable cycles
-system.cpu6.l1c.WriteReq_accesses               24364                       # number of WriteReq accesses(hits+misses)
-system.cpu6.l1c.WriteReq_avg_miss_latency 12854.640783                       # average WriteReq miss latency
-system.cpu6.l1c.WriteReq_avg_mshr_miss_latency 11385.598176                       # average WriteReq mshr miss latency
-system.cpu6.l1c.WriteReq_hits                    1222                       # number of WriteReq hits
-system.cpu6.l1c.WriteReq_miss_latency       297482097                       # number of WriteReq miss cycles
-system.cpu6.l1c.WriteReq_miss_rate           0.949844                       # miss rate for WriteReq accesses
-system.cpu6.l1c.WriteReq_misses                 23142                       # number of WriteReq misses
-system.cpu6.l1c.WriteReq_mshr_miss_latency    263485513                       # number of WriteReq MSHR miss cycles
-system.cpu6.l1c.WriteReq_mshr_miss_rate      0.949844                       # mshr miss rate for WriteReq accesses
-system.cpu6.l1c.WriteReq_mshr_misses            23142                       # number of WriteReq MSHR misses
-system.cpu6.l1c.WriteReq_mshr_uncacheable         5447                       # number of WriteReq MSHR uncacheable
-system.cpu6.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu6.l1c.WriteResp_mshr_uncacheable_latency    163399316                       # number of WriteResp MSHR uncacheable cycles
-system.cpu6.l1c.avg_blocked_cycles_no_mshrs  1189.328084                       # average number of cycles each access was blocked
+system.cpu5.num_reads                           99402                       # number of read accesses completed
+system.cpu5.num_writes                          54123                       # number of write accesses completed
+system.cpu6.l1c.ReadReq_accesses                45284                       # number of ReadReq accesses(hits+misses)
+system.cpu6.l1c.ReadReq_avg_miss_latency 22614.833240                       # average ReadReq miss latency
+system.cpu6.l1c.ReadReq_avg_mshr_miss_latency 22606.834542                       # average ReadReq mshr miss latency
+system.cpu6.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu6.l1c.ReadReq_hits                     7625                       # number of ReadReq hits
+system.cpu6.l1c.ReadReq_miss_latency        851652005                       # number of ReadReq miss cycles
+system.cpu6.l1c.ReadReq_miss_rate            0.831618                       # miss rate for ReadReq accesses
+system.cpu6.l1c.ReadReq_misses                  37659                       # number of ReadReq misses
+system.cpu6.l1c.ReadReq_mshr_miss_latency    851350782                       # number of ReadReq MSHR miss cycles
+system.cpu6.l1c.ReadReq_mshr_miss_rate       0.831618                       # mshr miss rate for ReadReq accesses
+system.cpu6.l1c.ReadReq_mshr_misses             37659                       # number of ReadReq MSHR misses
+system.cpu6.l1c.ReadReq_mshr_uncacheable_latency    513879090                       # number of ReadReq MSHR uncacheable cycles
+system.cpu6.l1c.WriteReq_accesses               24033                       # number of WriteReq accesses(hits+misses)
+system.cpu6.l1c.WriteReq_avg_miss_latency 25148.091805                       # average WriteReq miss latency
+system.cpu6.l1c.WriteReq_avg_mshr_miss_latency 25140.890430                       # average WriteReq mshr miss latency
+system.cpu6.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu6.l1c.WriteReq_hits                     897                       # number of WriteReq hits
+system.cpu6.l1c.WriteReq_miss_latency       581826252                       # number of WriteReq miss cycles
+system.cpu6.l1c.WriteReq_miss_rate           0.962676                       # miss rate for WriteReq accesses
+system.cpu6.l1c.WriteReq_misses                 23136                       # number of WriteReq misses
+system.cpu6.l1c.WriteReq_mshr_miss_latency    581659641                       # number of WriteReq MSHR miss cycles
+system.cpu6.l1c.WriteReq_mshr_miss_rate      0.962676                       # mshr miss rate for WriteReq accesses
+system.cpu6.l1c.WriteReq_mshr_misses            23136                       # number of WriteReq MSHR misses
+system.cpu6.l1c.WriteReq_mshr_uncacheable_latency    312525316                       # number of WriteReq MSHR uncacheable cycles
+system.cpu6.l1c.avg_blocked_cycles_no_mshrs  2288.777328                       # average number of cycles each access was blocked
 system.cpu6.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu6.l1c.avg_refs                     0.411043                       # Average number of references to valid blocks.
-system.cpu6.l1c.blocked_no_mshrs                69345                       # number of cycles access was blocked
+system.cpu6.l1c.avg_refs                     0.407927                       # Average number of references to valid blocks.
+system.cpu6.l1c.blocked_no_mshrs                69380                       # number of cycles access was blocked
 system.cpu6.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu6.l1c.blocked_cycles_no_mshrs      82473956                       # number of cycles access was blocked
+system.cpu6.l1c.blocked_cycles_no_mshrs     158795371                       # number of cycles access was blocked
 system.cpu6.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu6.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu6.l1c.demand_accesses                 68959                       # number of demand (read+write) accesses
-system.cpu6.l1c.demand_avg_miss_latency  13560.917263                       # average overall miss latency
-system.cpu6.l1c.demand_avg_mshr_miss_latency 12377.410436                       # average overall mshr miss latency
-system.cpu6.l1c.demand_hits                      8684                       # number of demand (read+write) hits
-system.cpu6.l1c.demand_miss_latency         817384288                       # number of demand (read+write) miss cycles
-system.cpu6.l1c.demand_miss_rate             0.874070                       # miss rate for demand accesses
-system.cpu6.l1c.demand_misses                   60275                       # number of demand (read+write) misses
+system.cpu6.l1c.demand_accesses                 69317                       # number of demand (read+write) accesses
+system.cpu6.l1c.demand_avg_miss_latency  23578.884069                       # average overall miss latency
+system.cpu6.l1c.demand_avg_mshr_miss_latency 23571.188798                       # average overall mshr miss latency
+system.cpu6.l1c.demand_hits                      8522                       # number of demand (read+write) hits
+system.cpu6.l1c.demand_miss_latency        1433478257                       # number of demand (read+write) miss cycles
+system.cpu6.l1c.demand_miss_rate             0.877058                       # miss rate for demand accesses
+system.cpu6.l1c.demand_misses                   60795                       # number of demand (read+write) misses
 system.cpu6.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu6.l1c.demand_mshr_miss_latency    746048414                       # number of demand (read+write) MSHR miss cycles
-system.cpu6.l1c.demand_mshr_miss_rate        0.874070                       # mshr miss rate for demand accesses
-system.cpu6.l1c.demand_mshr_misses              60275                       # number of demand (read+write) MSHR misses
+system.cpu6.l1c.demand_mshr_miss_latency   1433010423                       # number of demand (read+write) MSHR miss cycles
+system.cpu6.l1c.demand_mshr_miss_rate        0.877058                       # mshr miss rate for demand accesses
+system.cpu6.l1c.demand_mshr_misses              60795                       # number of demand (read+write) MSHR misses
 system.cpu6.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu6.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu6.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu6.l1c.overall_accesses                68959                       # number of overall (read+write) accesses
-system.cpu6.l1c.overall_avg_miss_latency 13560.917263                       # average overall miss latency
-system.cpu6.l1c.overall_avg_mshr_miss_latency 12377.410436                       # average overall mshr miss latency
-system.cpu6.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu6.l1c.overall_hits                     8684                       # number of overall hits
-system.cpu6.l1c.overall_miss_latency        817384288                       # number of overall miss cycles
-system.cpu6.l1c.overall_miss_rate            0.874070                       # miss rate for overall accesses
-system.cpu6.l1c.overall_misses                  60275                       # number of overall misses
+system.cpu6.l1c.overall_accesses                69317                       # number of overall (read+write) accesses
+system.cpu6.l1c.overall_avg_miss_latency 23578.884069                       # average overall miss latency
+system.cpu6.l1c.overall_avg_mshr_miss_latency 23571.188798                       # average overall mshr miss latency
+system.cpu6.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu6.l1c.overall_hits                     8522                       # number of overall hits
+system.cpu6.l1c.overall_miss_latency       1433478257                       # number of overall miss cycles
+system.cpu6.l1c.overall_miss_rate            0.877058                       # miss rate for overall accesses
+system.cpu6.l1c.overall_misses                  60795                       # number of overall misses
 system.cpu6.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu6.l1c.overall_mshr_miss_latency    746048414                       # number of overall MSHR miss cycles
-system.cpu6.l1c.overall_mshr_miss_rate       0.874070                       # mshr miss rate for overall accesses
-system.cpu6.l1c.overall_mshr_misses             60275                       # number of overall MSHR misses
-system.cpu6.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu6.l1c.overall_mshr_uncacheable_misses        15267                       # number of overall MSHR uncacheable misses
+system.cpu6.l1c.overall_mshr_miss_latency   1433010423                       # number of overall MSHR miss cycles
+system.cpu6.l1c.overall_mshr_miss_rate       0.877058                       # mshr miss rate for overall accesses
+system.cpu6.l1c.overall_mshr_misses             60795                       # number of overall MSHR misses
+system.cpu6.l1c.overall_mshr_uncacheable_latency    826404406                       # number of overall MSHR uncacheable cycles
+system.cpu6.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu6.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu6.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu6.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -718,104 +549,76 @@ system.cpu6.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu6.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu6.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu6.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu6.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu6.l1c.protocol.read_invalid         1894590                       # read misses to invalid blocks
-system.cpu6.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu6.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu6.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu6.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu6.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu6.l1c.protocol.snoop_read_exclusive         2887                       # read snoops on exclusive blocks
-system.cpu6.l1c.protocol.snoop_read_modified        12551                       # read snoops on modified blocks
-system.cpu6.l1c.protocol.snoop_read_owned         7188                       # read snoops on owned blocks
-system.cpu6.l1c.protocol.snoop_read_shared      1703425                       # read snoops on shared blocks
-system.cpu6.l1c.protocol.snoop_readex_exclusive         1550                       # readEx snoops on exclusive blocks
-system.cpu6.l1c.protocol.snoop_readex_modified         6733                       # readEx snoops on modified blocks
-system.cpu6.l1c.protocol.snoop_readex_owned         3926                       # readEx snoops on owned blocks
-system.cpu6.l1c.protocol.snoop_readex_shared        12456                       # readEx snoops on shared blocks
-system.cpu6.l1c.protocol.snoop_upgrade_owned          800                       # upgrade snoops on owned blocks
-system.cpu6.l1c.protocol.snoop_upgrade_shared         3156                       # upgradee snoops on shared blocks
-system.cpu6.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu6.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu6.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu6.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu6.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu6.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu6.l1c.protocol.write_invalid         987928                       # write misses to invalid blocks
-system.cpu6.l1c.protocol.write_owned             1405                       # write misses to owned blocks
-system.cpu6.l1c.protocol.write_shared            4406                       # write misses to shared blocks
-system.cpu6.l1c.replacements                    27613                       # number of replacements
-system.cpu6.l1c.sampled_refs                    27946                       # Sample count of references to valid blocks.
+system.cpu6.l1c.replacements                    27931                       # number of replacements
+system.cpu6.l1c.sampled_refs                    28282                       # Sample count of references to valid blocks.
 system.cpu6.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu6.l1c.tagsinuse                  344.860122                       # Cycle average of tags in use
-system.cpu6.l1c.total_refs                      11487                       # Total number of references to valid blocks.
+system.cpu6.l1c.tagsinuse                  346.778818                       # Cycle average of tags in use
+system.cpu6.l1c.total_refs                      11537                       # Total number of references to valid blocks.
 system.cpu6.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu6.l1c.writebacks                      11073                       # number of writebacks
+system.cpu6.l1c.writebacks                      10819                       # number of writebacks
 system.cpu6.num_copies                              0                       # number of copy accesses completed
-system.cpu6.num_reads                           98723                       # number of read accesses completed
-system.cpu6.num_writes                          53876                       # number of write accesses completed
-system.cpu7.l1c.ReadReq_accesses                44990                       # number of ReadReq accesses(hits+misses)
-system.cpu7.l1c.ReadReq_avg_miss_latency 13952.283047                       # average ReadReq miss latency
-system.cpu7.l1c.ReadReq_avg_mshr_miss_latency 12937.789329                       # average ReadReq mshr miss latency
-system.cpu7.l1c.ReadReq_hits                     7505                       # number of ReadReq hits
-system.cpu7.l1c.ReadReq_miss_latency        523001330                       # number of ReadReq miss cycles
-system.cpu7.l1c.ReadReq_miss_rate            0.833185                       # miss rate for ReadReq accesses
-system.cpu7.l1c.ReadReq_misses                  37485                       # number of ReadReq misses
-system.cpu7.l1c.ReadReq_mshr_miss_latency    484973033                       # number of ReadReq MSHR miss cycles
-system.cpu7.l1c.ReadReq_mshr_miss_rate       0.833185                       # mshr miss rate for ReadReq accesses
-system.cpu7.l1c.ReadReq_mshr_misses             37485                       # number of ReadReq MSHR misses
-system.cpu7.l1c.ReadReq_mshr_uncacheable        10001                       # number of ReadReq MSHR uncacheable
-system.cpu7.l1c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.cpu7.l1c.ReadResp_mshr_uncacheable_latency    257188342                       # number of ReadResp MSHR uncacheable cycles
-system.cpu7.l1c.WriteReq_accesses               24083                       # number of WriteReq accesses(hits+misses)
-system.cpu7.l1c.WriteReq_avg_miss_latency 12615.682417                       # average WriteReq miss latency
-system.cpu7.l1c.WriteReq_avg_mshr_miss_latency 11155.458639                       # average WriteReq mshr miss latency
-system.cpu7.l1c.WriteReq_hits                    1163                       # number of WriteReq hits
-system.cpu7.l1c.WriteReq_miss_latency       289151441                       # number of WriteReq miss cycles
-system.cpu7.l1c.WriteReq_miss_rate           0.951709                       # miss rate for WriteReq accesses
-system.cpu7.l1c.WriteReq_misses                 22920                       # number of WriteReq misses
-system.cpu7.l1c.WriteReq_mshr_miss_latency    255683112                       # number of WriteReq MSHR miss cycles
-system.cpu7.l1c.WriteReq_mshr_miss_rate      0.951709                       # mshr miss rate for WriteReq accesses
-system.cpu7.l1c.WriteReq_mshr_misses            22920                       # number of WriteReq MSHR misses
-system.cpu7.l1c.WriteReq_mshr_uncacheable         5323                       # number of WriteReq MSHR uncacheable
-system.cpu7.l1c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.cpu7.l1c.WriteResp_mshr_uncacheable_latency    159397105                       # number of WriteResp MSHR uncacheable cycles
-system.cpu7.l1c.avg_blocked_cycles_no_mshrs  1185.864523                       # average number of cycles each access was blocked
+system.cpu6.num_reads                          100000                       # number of read accesses completed
+system.cpu6.num_writes                          53600                       # number of write accesses completed
+system.cpu7.l1c.ReadReq_accesses                44617                       # number of ReadReq accesses(hits+misses)
+system.cpu7.l1c.ReadReq_avg_miss_latency 22791.302160                       # average ReadReq miss latency
+system.cpu7.l1c.ReadReq_avg_mshr_miss_latency 22783.302456                       # average ReadReq mshr miss latency
+system.cpu7.l1c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.cpu7.l1c.ReadReq_hits                     7491                       # number of ReadReq hits
+system.cpu7.l1c.ReadReq_miss_latency        846149884                       # number of ReadReq miss cycles
+system.cpu7.l1c.ReadReq_miss_rate            0.832104                       # miss rate for ReadReq accesses
+system.cpu7.l1c.ReadReq_misses                  37126                       # number of ReadReq misses
+system.cpu7.l1c.ReadReq_mshr_miss_latency    845852887                       # number of ReadReq MSHR miss cycles
+system.cpu7.l1c.ReadReq_mshr_miss_rate       0.832104                       # mshr miss rate for ReadReq accesses
+system.cpu7.l1c.ReadReq_mshr_misses             37126                       # number of ReadReq MSHR misses
+system.cpu7.l1c.ReadReq_mshr_uncacheable_latency    523016698                       # number of ReadReq MSHR uncacheable cycles
+system.cpu7.l1c.WriteReq_accesses               24432                       # number of WriteReq accesses(hits+misses)
+system.cpu7.l1c.WriteReq_avg_miss_latency 24654.748978                       # average WriteReq miss latency
+system.cpu7.l1c.WriteReq_avg_mshr_miss_latency 24647.585464                       # average WriteReq mshr miss latency
+system.cpu7.l1c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.cpu7.l1c.WriteReq_hits                     960                       # number of WriteReq hits
+system.cpu7.l1c.WriteReq_miss_latency       578696268                       # number of WriteReq miss cycles
+system.cpu7.l1c.WriteReq_miss_rate           0.960707                       # miss rate for WriteReq accesses
+system.cpu7.l1c.WriteReq_misses                 23472                       # number of WriteReq misses
+system.cpu7.l1c.WriteReq_mshr_miss_latency    578528126                       # number of WriteReq MSHR miss cycles
+system.cpu7.l1c.WriteReq_mshr_miss_rate      0.960707                       # mshr miss rate for WriteReq accesses
+system.cpu7.l1c.WriteReq_mshr_misses            23472                       # number of WriteReq MSHR misses
+system.cpu7.l1c.WriteReq_mshr_uncacheable_latency    310262407                       # number of WriteReq MSHR uncacheable cycles
+system.cpu7.l1c.avg_blocked_cycles_no_mshrs  2294.299163                       # average number of cycles each access was blocked
 system.cpu7.l1c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu7.l1c.avg_refs                     0.413879                       # Average number of references to valid blocks.
-system.cpu7.l1c.blocked_no_mshrs                69665                       # number of cycles access was blocked
+system.cpu7.l1c.avg_refs                     0.417293                       # Average number of references to valid blocks.
+system.cpu7.l1c.blocked_no_mshrs                69407                       # number of cycles access was blocked
 system.cpu7.l1c.blocked_no_targets                  0                       # number of cycles access was blocked
-system.cpu7.l1c.blocked_cycles_no_mshrs      82613252                       # number of cycles access was blocked
+system.cpu7.l1c.blocked_cycles_no_mshrs     159240422                       # number of cycles access was blocked
 system.cpu7.l1c.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu7.l1c.cache_copies                        0                       # number of cache copies performed
-system.cpu7.l1c.demand_accesses                 69073                       # number of demand (read+write) accesses
-system.cpu7.l1c.demand_avg_miss_latency  13445.124923                       # average overall miss latency
-system.cpu7.l1c.demand_avg_mshr_miss_latency 12261.503932                       # average overall mshr miss latency
-system.cpu7.l1c.demand_hits                      8668                       # number of demand (read+write) hits
-system.cpu7.l1c.demand_miss_latency         812152771                       # number of demand (read+write) miss cycles
-system.cpu7.l1c.demand_miss_rate             0.874510                       # miss rate for demand accesses
-system.cpu7.l1c.demand_misses                   60405                       # number of demand (read+write) misses
+system.cpu7.l1c.demand_accesses                 69049                       # number of demand (read+write) accesses
+system.cpu7.l1c.demand_avg_miss_latency  23513.088749                       # average overall miss latency
+system.cpu7.l1c.demand_avg_mshr_miss_latency 23505.412934                       # average overall mshr miss latency
+system.cpu7.l1c.demand_hits                      8451                       # number of demand (read+write) hits
+system.cpu7.l1c.demand_miss_latency        1424846152                       # number of demand (read+write) miss cycles
+system.cpu7.l1c.demand_miss_rate             0.877609                       # miss rate for demand accesses
+system.cpu7.l1c.demand_misses                   60598                       # number of demand (read+write) misses
 system.cpu7.l1c.demand_mshr_hits                    0                       # number of demand (read+write) MSHR hits
-system.cpu7.l1c.demand_mshr_miss_latency    740656145                       # number of demand (read+write) MSHR miss cycles
-system.cpu7.l1c.demand_mshr_miss_rate        0.874510                       # mshr miss rate for demand accesses
-system.cpu7.l1c.demand_mshr_misses              60405                       # number of demand (read+write) MSHR misses
+system.cpu7.l1c.demand_mshr_miss_latency   1424381013                       # number of demand (read+write) MSHR miss cycles
+system.cpu7.l1c.demand_mshr_miss_rate        0.877609                       # mshr miss rate for demand accesses
+system.cpu7.l1c.demand_mshr_misses              60598                       # number of demand (read+write) MSHR misses
 system.cpu7.l1c.fast_writes                         0                       # number of fast writes performed
 system.cpu7.l1c.mshr_cap_events                     0                       # number of times MSHR cap was activated
 system.cpu7.l1c.no_allocate_misses                  0                       # Number of misses that were no-allocate
-system.cpu7.l1c.overall_accesses                69073                       # number of overall (read+write) accesses
-system.cpu7.l1c.overall_avg_miss_latency 13445.124923                       # average overall miss latency
-system.cpu7.l1c.overall_avg_mshr_miss_latency 12261.503932                       # average overall mshr miss latency
-system.cpu7.l1c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.cpu7.l1c.overall_hits                     8668                       # number of overall hits
-system.cpu7.l1c.overall_miss_latency        812152771                       # number of overall miss cycles
-system.cpu7.l1c.overall_miss_rate            0.874510                       # miss rate for overall accesses
-system.cpu7.l1c.overall_misses                  60405                       # number of overall misses
+system.cpu7.l1c.overall_accesses                69049                       # number of overall (read+write) accesses
+system.cpu7.l1c.overall_avg_miss_latency 23513.088749                       # average overall miss latency
+system.cpu7.l1c.overall_avg_mshr_miss_latency 23505.412934                       # average overall mshr miss latency
+system.cpu7.l1c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.cpu7.l1c.overall_hits                     8451                       # number of overall hits
+system.cpu7.l1c.overall_miss_latency       1424846152                       # number of overall miss cycles
+system.cpu7.l1c.overall_miss_rate            0.877609                       # miss rate for overall accesses
+system.cpu7.l1c.overall_misses                  60598                       # number of overall misses
 system.cpu7.l1c.overall_mshr_hits                   0                       # number of overall MSHR hits
-system.cpu7.l1c.overall_mshr_miss_latency    740656145                       # number of overall MSHR miss cycles
-system.cpu7.l1c.overall_mshr_miss_rate       0.874510                       # mshr miss rate for overall accesses
-system.cpu7.l1c.overall_mshr_misses             60405                       # number of overall MSHR misses
-system.cpu7.l1c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.cpu7.l1c.overall_mshr_uncacheable_misses        15324                       # number of overall MSHR uncacheable misses
+system.cpu7.l1c.overall_mshr_miss_latency   1424381013                       # number of overall MSHR miss cycles
+system.cpu7.l1c.overall_mshr_miss_rate       0.877609                       # mshr miss rate for overall accesses
+system.cpu7.l1c.overall_mshr_misses             60598                       # number of overall MSHR misses
+system.cpu7.l1c.overall_mshr_uncacheable_latency    833279105                       # number of overall MSHR uncacheable cycles
+system.cpu7.l1c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu7.l1c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu7.l1c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu7.l1c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -825,112 +628,92 @@ system.cpu7.l1c.prefetcher.num_hwpf_issued            0                       #
 system.cpu7.l1c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu7.l1c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu7.l1c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu7.l1c.protocol.hwpf_invalid               0                       # hard prefetch misses to invalid blocks
-system.cpu7.l1c.protocol.read_invalid         1929884                       # read misses to invalid blocks
-system.cpu7.l1c.protocol.snoop_inv_exclusive            0                       # Invalidate snoops on exclusive blocks
-system.cpu7.l1c.protocol.snoop_inv_invalid            0                       # Invalidate snoops on invalid blocks
-system.cpu7.l1c.protocol.snoop_inv_modified            0                       # Invalidate snoops on modified blocks
-system.cpu7.l1c.protocol.snoop_inv_owned            0                       # Invalidate snoops on owned blocks
-system.cpu7.l1c.protocol.snoop_inv_shared            0                       # Invalidate snoops on shared blocks
-system.cpu7.l1c.protocol.snoop_read_exclusive         2904                       # read snoops on exclusive blocks
-system.cpu7.l1c.protocol.snoop_read_modified        12387                       # read snoops on modified blocks
-system.cpu7.l1c.protocol.snoop_read_owned         7174                       # read snoops on owned blocks
-system.cpu7.l1c.protocol.snoop_read_shared      1782059                       # read snoops on shared blocks
-system.cpu7.l1c.protocol.snoop_readex_exclusive         1587                       # readEx snoops on exclusive blocks
-system.cpu7.l1c.protocol.snoop_readex_modified         6687                       # readEx snoops on modified blocks
-system.cpu7.l1c.protocol.snoop_readex_owned         3842                       # readEx snoops on owned blocks
-system.cpu7.l1c.protocol.snoop_readex_shared        12759                       # readEx snoops on shared blocks
-system.cpu7.l1c.protocol.snoop_upgrade_owned          792                       # upgrade snoops on owned blocks
-system.cpu7.l1c.protocol.snoop_upgrade_shared         3085                       # upgradee snoops on shared blocks
-system.cpu7.l1c.protocol.snoop_writeinv_exclusive            0                       # WriteInvalidate snoops on exclusive blocks
-system.cpu7.l1c.protocol.snoop_writeinv_invalid            0                       # WriteInvalidate snoops on invalid blocks
-system.cpu7.l1c.protocol.snoop_writeinv_modified            0                       # WriteInvalidate snoops on modified blocks
-system.cpu7.l1c.protocol.snoop_writeinv_owned            0                       # WriteInvalidate snoops on owned blocks
-system.cpu7.l1c.protocol.snoop_writeinv_shared            0                       # WriteInvalidate snoops on shared blocks
-system.cpu7.l1c.protocol.swpf_invalid               0                       # soft prefetch misses to invalid blocks
-system.cpu7.l1c.protocol.write_invalid         930930                       # write misses to invalid blocks
-system.cpu7.l1c.protocol.write_owned             1422                       # write misses to owned blocks
-system.cpu7.l1c.protocol.write_shared            4465                       # write misses to shared blocks
-system.cpu7.l1c.replacements                    27486                       # number of replacements
-system.cpu7.l1c.sampled_refs                    27827                       # Sample count of references to valid blocks.
+system.cpu7.l1c.replacements                    27613                       # number of replacements
+system.cpu7.l1c.sampled_refs                    27942                       # Sample count of references to valid blocks.
 system.cpu7.l1c.soft_prefetch_mshr_full             0                       # number of mshr full events for SW prefetching instrutions
-system.cpu7.l1c.tagsinuse                  344.310963                       # Cycle average of tags in use
-system.cpu7.l1c.total_refs                      11517                       # Total number of references to valid blocks.
+system.cpu7.l1c.tagsinuse                  345.414592                       # Cycle average of tags in use
+system.cpu7.l1c.total_refs                      11660                       # Total number of references to valid blocks.
 system.cpu7.l1c.warmup_cycle                        0                       # Cycle when the warmup percentage was hit.
-system.cpu7.l1c.writebacks                      10979                       # number of writebacks
+system.cpu7.l1c.writebacks                      10955                       # number of writebacks
 system.cpu7.num_copies                              0                       # number of copy accesses completed
-system.cpu7.num_reads                           99734                       # number of read accesses completed
-system.cpu7.num_writes                          53652                       # number of write accesses completed
-system.l2c.ReadExReq_accesses                   75160                       # number of ReadExReq accesses(hits+misses)
-system.l2c.ReadExReq_avg_miss_latency    10115.633652                       # average ReadExReq miss latency
-system.l2c.ReadExReq_avg_mshr_miss_latency  6085.503709                       # average ReadExReq mshr miss latency
-system.l2c.ReadExReq_hits                       39620                       # number of ReadExReq hits
-system.l2c.ReadExReq_miss_latency           359509620                       # number of ReadExReq miss cycles
-system.l2c.ReadExReq_miss_rate               0.472858                       # miss rate for ReadExReq accesses
-system.l2c.ReadExReq_misses                     35540                       # number of ReadExReq misses
-system.l2c.ReadExReq_mshr_hits                    220                       # number of ReadExReq MSHR hits
-system.l2c.ReadExReq_mshr_miss_latency      214939991                       # number of ReadExReq MSHR miss cycles
-system.l2c.ReadExReq_mshr_miss_rate          0.469931                       # mshr miss rate for ReadExReq accesses
-system.l2c.ReadExReq_mshr_misses                35320                       # number of ReadExReq MSHR misses
-system.l2c.ReadReq_accesses                    138762                       # number of ReadReq accesses(hits+misses)
-system.l2c.ReadReq_avg_miss_latency      10150.344064                       # average ReadReq miss latency
-system.l2c.ReadReq_avg_mshr_miss_latency  6129.500996                       # average ReadReq mshr miss latency
-system.l2c.ReadReq_hits                         72597                       # number of ReadReq hits
-system.l2c.ReadReq_miss_latency             671597515                       # number of ReadReq miss cycles
-system.l2c.ReadReq_miss_rate                 0.476824                       # miss rate for ReadReq accesses
-system.l2c.ReadReq_misses                       66165                       # number of ReadReq misses
-system.l2c.ReadReq_mshr_hits                      406                       # number of ReadReq MSHR hits
-system.l2c.ReadReq_mshr_miss_latency        403069856                       # number of ReadReq MSHR miss cycles
-system.l2c.ReadReq_mshr_miss_rate            0.473898                       # mshr miss rate for ReadReq accesses
-system.l2c.ReadReq_mshr_misses                  65759                       # number of ReadReq MSHR misses
-system.l2c.ReadReq_mshr_uncacheable             78927                       # number of ReadReq MSHR uncacheable
-system.l2c.ReadResp_avg_mshr_uncacheable_latency          inf                       # average ReadResp mshr uncacheable latency
-system.l2c.ReadResp_mshr_uncacheable_latency    484683934                       # number of ReadResp MSHR uncacheable cycles
-system.l2c.WriteReq_mshr_uncacheable            42802                       # number of WriteReq MSHR uncacheable
-system.l2c.WriteResp_avg_mshr_uncacheable_latency          inf                       # average WriteResp mshr uncacheable latency
-system.l2c.WriteResp_mshr_uncacheable_latency    248118294                       # number of WriteResp MSHR uncacheable cycles
-system.l2c.Writeback_accesses                   86706                       # number of Writeback accesses(hits+misses)
-system.l2c.Writeback_hits                       18948                       # number of Writeback hits
-system.l2c.Writeback_miss_rate               0.781468                       # miss rate for Writeback accesses
-system.l2c.Writeback_misses                     67758                       # number of Writeback misses
-system.l2c.Writeback_mshr_miss_rate          0.781468                       # mshr miss rate for Writeback accesses
-system.l2c.Writeback_mshr_misses                67758                       # number of Writeback MSHR misses
+system.cpu7.num_reads                           98933                       # number of read accesses completed
+system.cpu7.num_writes                          53679                       # number of write accesses completed
+system.l2c.ReadExReq_accesses                   74732                       # number of ReadExReq accesses(hits+misses)
+system.l2c.ReadExReq_avg_miss_latency    10058.723893                       # average ReadExReq miss latency
+system.l2c.ReadExReq_avg_mshr_miss_latency 10012.709549                       # average ReadExReq mshr miss latency
+system.l2c.ReadExReq_miss_latency           751708554                       # number of ReadExReq miss cycles
+system.l2c.ReadExReq_miss_rate                      1                       # miss rate for ReadExReq accesses
+system.l2c.ReadExReq_misses                     74732                       # number of ReadExReq misses
+system.l2c.ReadExReq_mshr_hits                    486                       # number of ReadExReq MSHR hits
+system.l2c.ReadExReq_mshr_miss_latency      748269810                       # number of ReadExReq MSHR miss cycles
+system.l2c.ReadExReq_mshr_miss_rate                 1                       # mshr miss rate for ReadExReq accesses
+system.l2c.ReadExReq_mshr_misses                74732                       # number of ReadExReq MSHR misses
+system.l2c.ReadReq_accesses                    138119                       # number of ReadReq accesses(hits+misses)
+system.l2c.ReadReq_avg_miss_latency      10093.112454                       # average ReadReq miss latency
+system.l2c.ReadReq_avg_mshr_miss_latency 10012.902949                       # average ReadReq mshr miss latency
+system.l2c.ReadReq_avg_mshr_uncacheable_latency          inf                       # average ReadReq mshr uncacheable latency
+system.l2c.ReadReq_hits                         62746                       # number of ReadReq hits
+system.l2c.ReadReq_miss_latency             760748165                       # number of ReadReq miss cycles
+system.l2c.ReadReq_miss_rate                 0.545711                       # miss rate for ReadReq accesses
+system.l2c.ReadReq_misses                       75373                       # number of ReadReq misses
+system.l2c.ReadReq_mshr_hits                      858                       # number of ReadReq MSHR hits
+system.l2c.ReadReq_mshr_miss_latency        754702534                       # number of ReadReq MSHR miss cycles
+system.l2c.ReadReq_mshr_miss_rate            0.545711                       # mshr miss rate for ReadReq accesses
+system.l2c.ReadReq_mshr_misses                  75373                       # number of ReadReq MSHR misses
+system.l2c.ReadReq_mshr_uncacheable_latency    792432163                       # number of ReadReq MSHR uncacheable cycles
+system.l2c.UpgradeReq_accesses                  18312                       # number of UpgradeReq accesses(hits+misses)
+system.l2c.UpgradeReq_avg_miss_latency    5090.815258                       # average UpgradeReq miss latency
+system.l2c.UpgradeReq_avg_mshr_miss_latency 10012.622433                       # average UpgradeReq mshr miss latency
+system.l2c.UpgradeReq_miss_latency           93223009                       # number of UpgradeReq miss cycles
+system.l2c.UpgradeReq_miss_rate                     1                       # miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_misses                    18312                       # number of UpgradeReq misses
+system.l2c.UpgradeReq_mshr_hits                    25                       # number of UpgradeReq MSHR hits
+system.l2c.UpgradeReq_mshr_miss_latency     183351142                       # number of UpgradeReq MSHR miss cycles
+system.l2c.UpgradeReq_mshr_miss_rate                1                       # mshr miss rate for UpgradeReq accesses
+system.l2c.UpgradeReq_mshr_misses               18312                       # number of UpgradeReq MSHR misses
+system.l2c.WriteReq_avg_mshr_uncacheable_latency          inf                       # average WriteReq mshr uncacheable latency
+system.l2c.WriteReq_mshr_uncacheable_latency    430029394                       # number of WriteReq MSHR uncacheable cycles
+system.l2c.Writeback_accesses                   86893                       # number of Writeback accesses(hits+misses)
+system.l2c.Writeback_miss_rate                      1                       # miss rate for Writeback accesses
+system.l2c.Writeback_misses                     86893                       # number of Writeback misses
+system.l2c.Writeback_mshr_miss_rate                 1                       # mshr miss rate for Writeback accesses
+system.l2c.Writeback_mshr_misses                86893                       # number of Writeback MSHR misses
 system.l2c.avg_blocked_cycles_no_mshrs           3278                       # average number of cycles each access was blocked
 system.l2c.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.l2c.avg_refs                          1.297661                       # Average number of references to valid blocks.
+system.l2c.avg_refs                          3.318198                       # Average number of references to valid blocks.
 system.l2c.blocked_no_mshrs                         3                       # number of cycles access was blocked
 system.l2c.blocked_no_targets                       0                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_mshrs               9834                       # number of cycles access was blocked
 system.l2c.blocked_cycles_no_targets                0                       # number of cycles access was blocked
 system.l2c.cache_copies                             0                       # number of cache copies performed
-system.l2c.demand_accesses                     138762                       # number of demand (read+write) accesses
-system.l2c.demand_avg_miss_latency       10150.344064                       # average overall miss latency
-system.l2c.demand_avg_mshr_miss_latency   6129.500996                       # average overall mshr miss latency
-system.l2c.demand_hits                          72597                       # number of demand (read+write) hits
-system.l2c.demand_miss_latency              671597515                       # number of demand (read+write) miss cycles
-system.l2c.demand_miss_rate                  0.476824                       # miss rate for demand accesses
-system.l2c.demand_misses                        66165                       # number of demand (read+write) misses
-system.l2c.demand_mshr_hits                       406                       # number of demand (read+write) MSHR hits
-system.l2c.demand_mshr_miss_latency         403069856                       # number of demand (read+write) MSHR miss cycles
-system.l2c.demand_mshr_miss_rate             0.473898                       # mshr miss rate for demand accesses
-system.l2c.demand_mshr_misses                   65759                       # number of demand (read+write) MSHR misses
+system.l2c.demand_accesses                     212851                       # number of demand (read+write) accesses
+system.l2c.demand_avg_miss_latency       10075.991599                       # average overall miss latency
+system.l2c.demand_avg_mshr_miss_latency  10012.806662                       # average overall mshr miss latency
+system.l2c.demand_hits                          62746                       # number of demand (read+write) hits
+system.l2c.demand_miss_latency             1512456719                       # number of demand (read+write) miss cycles
+system.l2c.demand_miss_rate                  0.705212                       # miss rate for demand accesses
+system.l2c.demand_misses                       150105                       # number of demand (read+write) misses
+system.l2c.demand_mshr_hits                      1344                       # number of demand (read+write) MSHR hits
+system.l2c.demand_mshr_miss_latency        1502972344                       # number of demand (read+write) MSHR miss cycles
+system.l2c.demand_mshr_miss_rate             0.705212                       # mshr miss rate for demand accesses
+system.l2c.demand_mshr_misses                  150105                       # number of demand (read+write) MSHR misses
 system.l2c.fast_writes                              0                       # number of fast writes performed
 system.l2c.mshr_cap_events                          0                       # number of times MSHR cap was activated
 system.l2c.no_allocate_misses                       0                       # Number of misses that were no-allocate
-system.l2c.overall_accesses                    225468                       # number of overall (read+write) accesses
-system.l2c.overall_avg_miss_latency       5014.803394                       # average overall miss latency
-system.l2c.overall_avg_mshr_miss_latency  6129.500996                       # average overall mshr miss latency
-system.l2c.overall_avg_mshr_uncacheable_latency            0                       # average overall mshr uncacheable latency
-system.l2c.overall_hits                         91545                       # number of overall hits
-system.l2c.overall_miss_latency             671597515                       # number of overall miss cycles
-system.l2c.overall_miss_rate                 0.593978                       # miss rate for overall accesses
-system.l2c.overall_misses                      133923                       # number of overall misses
-system.l2c.overall_mshr_hits                      406                       # number of overall MSHR hits
-system.l2c.overall_mshr_miss_latency        403069856                       # number of overall MSHR miss cycles
-system.l2c.overall_mshr_miss_rate            0.291656                       # mshr miss rate for overall accesses
-system.l2c.overall_mshr_misses                  65759                       # number of overall MSHR misses
-system.l2c.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
-system.l2c.overall_mshr_uncacheable_misses       121729                       # number of overall MSHR uncacheable misses
+system.l2c.overall_accesses                    212851                       # number of overall (read+write) accesses
+system.l2c.overall_avg_miss_latency      10075.991599                       # average overall miss latency
+system.l2c.overall_avg_mshr_miss_latency 10012.806662                       # average overall mshr miss latency
+system.l2c.overall_avg_mshr_uncacheable_latency          inf                       # average overall mshr uncacheable latency
+system.l2c.overall_hits                         62746                       # number of overall hits
+system.l2c.overall_miss_latency            1512456719                       # number of overall miss cycles
+system.l2c.overall_miss_rate                 0.705212                       # miss rate for overall accesses
+system.l2c.overall_misses                      150105                       # number of overall misses
+system.l2c.overall_mshr_hits                     1344                       # number of overall MSHR hits
+system.l2c.overall_mshr_miss_latency       1502972344                       # number of overall MSHR miss cycles
+system.l2c.overall_mshr_miss_rate            0.705212                       # mshr miss rate for overall accesses
+system.l2c.overall_mshr_misses                 150105                       # number of overall MSHR misses
+system.l2c.overall_mshr_uncacheable_latency   1222461557                       # number of overall MSHR uncacheable cycles
+system.l2c.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.l2c.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.l2c.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.l2c.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
@@ -940,12 +723,12 @@ system.l2c.prefetcher.num_hwpf_issued               0                       # nu
 system.l2c.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.l2c.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.l2c.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.l2c.replacements                        100054                       # number of replacements
-system.l2c.sampled_refs                        101078                       # Sample count of references to valid blocks.
+system.l2c.replacements                         31000                       # number of replacements
+system.l2c.sampled_refs                         31427                       # Sample count of references to valid blocks.
 system.l2c.soft_prefetch_mshr_full                  0                       # number of mshr full events for SW prefetching instrutions
-system.l2c.tagsinuse                      1023.099242                       # Cycle average of tags in use
-system.l2c.total_refs                          131165                       # Total number of references to valid blocks.
-system.l2c.warmup_cycle                        296156                       # Cycle when the warmup percentage was hit.
-system.l2c.writebacks                           16243                       # number of writebacks
+system.l2c.tagsinuse                       461.978673                       # Cycle average of tags in use
+system.l2c.total_refs                          104281                       # Total number of references to valid blocks.
+system.l2c.warmup_cycle                             0                       # Cycle when the warmup percentage was hit.
+system.l2c.writebacks                               0                       # number of writebacks
 
 ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr b/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr
index d45294bbb..87bef1427 100644
--- a/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr
+++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr
@@ -1,74 +1,74 @@
 warn: Entering event queue @ 0.  Starting simulation...
-system.cpu7: completed 10000 read accesses @8253930
-system.cpu1: completed 10000 read accesses @8325085
-system.cpu6: completed 10000 read accesses @8427313
-system.cpu4: completed 10000 read accesses @8438233
-system.cpu2: completed 10000 read accesses @8458126
-system.cpu5: completed 10000 read accesses @8549800
-system.cpu3: completed 10000 read accesses @8559995
-system.cpu0: completed 10000 read accesses @8593654
-system.cpu7: completed 20000 read accesses @16744182
-system.cpu1: completed 20000 read accesses @16774744
-system.cpu4: completed 20000 read accesses @16786220
-system.cpu3: completed 20000 read accesses @16787358
-system.cpu5: completed 20000 read accesses @16795808
-system.cpu6: completed 20000 read accesses @16836913
-system.cpu2: completed 20000 read accesses @17031052
-system.cpu0: completed 20000 read accesses @17126654
-system.cpu5: completed 30000 read accesses @24892576
-system.cpu6: completed 30000 read accesses @24903300
-system.cpu3: completed 30000 read accesses @24935860
-system.cpu4: completed 30000 read accesses @25020642
-system.cpu1: completed 30000 read accesses @25031726
-system.cpu7: completed 30000 read accesses @25112091
-system.cpu2: completed 30000 read accesses @25235960
-system.cpu0: completed 30000 read accesses @25505209
-system.cpu5: completed 40000 read accesses @33191203
-system.cpu6: completed 40000 read accesses @33273684
-system.cpu4: completed 40000 read accesses @33345526
-system.cpu3: completed 40000 read accesses @33406412
-system.cpu7: completed 40000 read accesses @33509130
-system.cpu1: completed 40000 read accesses @33509218
-system.cpu2: completed 40000 read accesses @33664822
-system.cpu0: completed 40000 read accesses @33869626
-system.cpu5: completed 50000 read accesses @41488848
-system.cpu4: completed 50000 read accesses @41582702
-system.cpu7: completed 50000 read accesses @41828988
-system.cpu3: completed 50000 read accesses @41829496
-system.cpu1: completed 50000 read accesses @41849534
-system.cpu6: completed 50000 read accesses @41982608
-system.cpu2: completed 50000 read accesses @42197798
-system.cpu0: completed 50000 read accesses @42443468
-system.cpu5: completed 60000 read accesses @49751344
-system.cpu4: completed 60000 read accesses @49783100
-system.cpu1: completed 60000 read accesses @49918062
-system.cpu7: completed 60000 read accesses @49929008
-system.cpu3: completed 60000 read accesses @50173996
-system.cpu6: completed 60000 read accesses @50351766
-system.cpu2: completed 60000 read accesses @50352657
-system.cpu0: completed 60000 read accesses @50789771
-system.cpu4: completed 70000 read accesses @58352386
-system.cpu5: completed 70000 read accesses @58394758
-system.cpu7: completed 70000 read accesses @58570698
-system.cpu1: completed 70000 read accesses @58764169
-system.cpu3: completed 70000 read accesses @58764648
-system.cpu2: completed 70000 read accesses @58921714
-system.cpu6: completed 70000 read accesses @58929984
-system.cpu0: completed 70000 read accesses @59567320
-system.cpu1: completed 80000 read accesses @67092786
-system.cpu5: completed 80000 read accesses @67153667
-system.cpu4: completed 80000 read accesses @67153760
-system.cpu7: completed 80000 read accesses @67207042
-system.cpu3: completed 80000 read accesses @67238507
-system.cpu2: completed 80000 read accesses @67633112
-system.cpu6: completed 80000 read accesses @67664637
-system.cpu0: completed 80000 read accesses @68437288
-system.cpu1: completed 90000 read accesses @75679048
-system.cpu4: completed 90000 read accesses @75680280
-system.cpu7: completed 90000 read accesses @75751053
-system.cpu5: completed 90000 read accesses @75781514
-system.cpu3: completed 90000 read accesses @75844118
-system.cpu2: completed 90000 read accesses @76346671
-system.cpu6: completed 90000 read accesses @76491728
-system.cpu0: completed 90000 read accesses @77376872
-system.cpu1: completed 100000 read accesses @84350509
+system.cpu7: completed 10000 read accesses @15607088
+system.cpu1: completed 10000 read accesses @15686239
+system.cpu5: completed 10000 read accesses @15771479
+system.cpu4: completed 10000 read accesses @15772513
+system.cpu0: completed 10000 read accesses @15778178
+system.cpu6: completed 10000 read accesses @15791633
+system.cpu2: completed 10000 read accesses @15841990
+system.cpu3: completed 10000 read accesses @15878600
+system.cpu2: completed 20000 read accesses @31878727
+system.cpu7: completed 20000 read accesses @32026154
+system.cpu6: completed 20000 read accesses @32057190
+system.cpu1: completed 20000 read accesses @32240417
+system.cpu0: completed 20000 read accesses @32270672
+system.cpu3: completed 20000 read accesses @32335938
+system.cpu5: completed 20000 read accesses @32480722
+system.cpu4: completed 20000 read accesses @32490454
+system.cpu2: completed 30000 read accesses @48060100
+system.cpu6: completed 30000 read accesses @48167196
+system.cpu4: completed 30000 read accesses @48520588
+system.cpu7: completed 30000 read accesses @48646309
+system.cpu0: completed 30000 read accesses @48740616
+system.cpu1: completed 30000 read accesses @48766857
+system.cpu3: completed 30000 read accesses @48959010
+system.cpu5: completed 30000 read accesses @49028132
+system.cpu6: completed 40000 read accesses @64421948
+system.cpu4: completed 40000 read accesses @64637670
+system.cpu2: completed 40000 read accesses @64868400
+system.cpu1: completed 40000 read accesses @64925788
+system.cpu0: completed 40000 read accesses @64956331
+system.cpu3: completed 40000 read accesses @65406565
+system.cpu5: completed 40000 read accesses @65517578
+system.cpu7: completed 40000 read accesses @65556693
+system.cpu6: completed 50000 read accesses @80917227
+system.cpu2: completed 50000 read accesses @80917444
+system.cpu4: completed 50000 read accesses @81159816
+system.cpu1: completed 50000 read accesses @81373401
+system.cpu3: completed 50000 read accesses @81540449
+system.cpu0: completed 50000 read accesses @81577912
+system.cpu5: completed 50000 read accesses @81975441
+system.cpu7: completed 50000 read accesses @82285501
+system.cpu2: completed 60000 read accesses @96985412
+system.cpu4: completed 60000 read accesses @97174738
+system.cpu6: completed 60000 read accesses @97530786
+system.cpu0: completed 60000 read accesses @97671589
+system.cpu3: completed 60000 read accesses @97821937
+system.cpu1: completed 60000 read accesses @97822818
+system.cpu5: completed 60000 read accesses @98044596
+system.cpu7: completed 60000 read accesses @98812006
+system.cpu2: completed 70000 read accesses @113400661
+system.cpu4: completed 70000 read accesses @113949415
+system.cpu1: completed 70000 read accesses @114120869
+system.cpu3: completed 70000 read accesses @114207385
+system.cpu0: completed 70000 read accesses @114307850
+system.cpu6: completed 70000 read accesses @114393410
+system.cpu5: completed 70000 read accesses @114714609
+system.cpu7: completed 70000 read accesses @115286783
+system.cpu2: completed 80000 read accesses @130149084
+system.cpu0: completed 80000 read accesses @130494872
+system.cpu4: completed 80000 read accesses @130604588
+system.cpu6: completed 80000 read accesses @130741327
+system.cpu1: completed 80000 read accesses @130791488
+system.cpu3: completed 80000 read accesses @130805400
+system.cpu5: completed 80000 read accesses @130975948
+system.cpu7: completed 80000 read accesses @131555733
+system.cpu2: completed 90000 read accesses @146468442
+system.cpu6: completed 90000 read accesses @146616353
+system.cpu1: completed 90000 read accesses @146926939
+system.cpu3: completed 90000 read accesses @147059543
+system.cpu0: completed 90000 read accesses @147067458
+system.cpu5: completed 90000 read accesses @147440946
+system.cpu4: completed 90000 read accesses @147560717
+system.cpu7: completed 90000 read accesses @148115904
+system.cpu6: completed 100000 read accesses @163182312
diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout b/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout
index a77db6fb9..29891e1e8 100644
--- a/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout
+++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout
@@ -5,9 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:06:20
-M5 started Sun Jun 10 14:22:51 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/50.memtest/alpha/linux/memtest tests/run.py quick/50.memtest/alpha/linux/memtest
+M5 compiled Aug  3 2007 03:56:47
+M5 started Fri Aug  3 04:17:16 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest tests/run.py quick/50.memtest/alpha/linux/memtest
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 84350509 because Maximum number of loads reached!
+Exiting @ tick 163182312 because maximum number of loads reached
diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini
index c16d67687..4cbaaf71e 100644
--- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini
+++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini
@@ -14,7 +14,7 @@ kernel=/dist/m5/system/binaries/vmlinux
 mem_mode=atomic
 pal=/dist/m5/system/binaries/ts_osfpal
 physmem=drivesys.physmem
-readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-server.rcS
+readfile=/z/stever/hg/m5.stever/configs/boot/netperf-server.rcS
 symbolfile=
 system_rev=1024
 system_type=34
@@ -35,7 +35,7 @@ side_b=drivesys.membus.port[0]
 
 [drivesys.cpu]
 type=AtomicSimpleCPU
-children=dtb itb
+children=dtb itb tracer
 clock=1
 cpu_id=0
 defer_registration=false
@@ -55,6 +55,7 @@ profile=0
 progress_interval=0
 simulate_stalls=false
 system=drivesys
+tracer=drivesys.cpu.tracer
 width=1
 dcache_port=drivesys.membus.port[3]
 icache_port=drivesys.membus.port[2]
@@ -67,6 +68,9 @@ size=64
 type=AlphaITB
 size=48
 
+[drivesys.cpu.tracer]
+type=ExeTracer
+
 [drivesys.disk0]
 type=IdeDisk
 children=image
@@ -647,7 +651,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=drivesys.tsunami
 system=drivesys
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=drivesys.tsunami
 year_is_bcd=false
 pio=drivesys.iobus.port[23]
@@ -704,7 +708,7 @@ kernel=/dist/m5/system/binaries/vmlinux
 mem_mode=atomic
 pal=/dist/m5/system/binaries/ts_osfpal
 physmem=testsys.physmem
-readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-stream-client.rcS
+readfile=/z/stever/hg/m5.stever/configs/boot/netperf-stream-client.rcS
 symbolfile=
 system_rev=1024
 system_type=34
@@ -725,7 +729,7 @@ side_b=testsys.membus.port[0]
 
 [testsys.cpu]
 type=AtomicSimpleCPU
-children=dtb itb
+children=dtb itb tracer
 clock=1
 cpu_id=0
 defer_registration=false
@@ -745,6 +749,7 @@ profile=0
 progress_interval=0
 simulate_stalls=false
 system=testsys
+tracer=testsys.cpu.tracer
 width=1
 dcache_port=testsys.membus.port[3]
 icache_port=testsys.membus.port[2]
@@ -757,6 +762,9 @@ size=64
 type=AlphaITB
 size=48
 
+[testsys.cpu.tracer]
+type=ExeTracer
+
 [testsys.disk0]
 type=IdeDisk
 children=image
@@ -1337,7 +1345,7 @@ pio_addr=8804615847936
 pio_latency=1000
 platform=testsys.tsunami
 system=testsys
-time=2009 1 1 0 0 0 3 1
+time=Thu Jan  1 00:00:00 2009
 tsunami=testsys.tsunami
 year_is_bcd=false
 pio=testsys.iobus.port[23]
diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt
index e6bc6fb19..719430102 100644
--- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt
+++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt
@@ -139,9 +139,10 @@ drivesys.tsunami.ethernet.txPPS                    25                       # Pa
 drivesys.tsunami.ethernet.txPackets                 5                       # Number of Packets Transmitted
 drivesys.tsunami.ethernet.txTcpChecksums            2                       # Number of tx TCP Checksums done by device
 drivesys.tsunami.ethernet.txUdpChecksums            0                       # Number of tx UDP Checksums done by device
-host_inst_rate                                6618724                       # Simulator instruction rate (inst/s)
-host_seconds                                    41.30                       # Real time elapsed on the host
-host_tick_rate                             4842704130                       # Simulator tick rate (ticks/s)
+host_inst_rate                               51081325                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 406704                       # Number of bytes of host memory used
+host_seconds                                     5.35                       # Real time elapsed on the host
+host_tick_rate                            37372483621                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                   273348482                       # Number of instructions simulated
 sim_seconds                                  0.200001                       # Number of seconds simulated
@@ -380,9 +381,10 @@ drivesys.tsunami.ethernet.totalSwi                  0                       # to
 drivesys.tsunami.ethernet.totalTxDesc               0                       # total number of TxDesc written to ISR
 drivesys.tsunami.ethernet.totalTxIdle               0                       # total number of TxIdle written to ISR
 drivesys.tsunami.ethernet.totalTxOk                 0                       # total number of TxOk written to ISR
-host_inst_rate                            65191624612                       # Simulator instruction rate (inst/s)
+host_inst_rate                            71036507796                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 406704                       # Number of bytes of host memory used
 host_seconds                                     0.00                       # Real time elapsed on the host
-host_tick_rate                              183725573                       # Simulator tick rate (ticks/s)
+host_tick_rate                              191282064                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                   273348482                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr
index 8fb9590c3..4f6a93597 100644
--- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr
+++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr
@@ -1,6 +1,6 @@
-Listening for testsys connection on port 3456
-Listening for drivesys connection on port 3457
-0: testsys.remote_gdb.listener: listening for remote gdb #0 on port 7000
-0: drivesys.remote_gdb.listener: listening for remote gdb #1 on port 7001
+Listening for testsys connection on port 3457
+Listening for drivesys connection on port 3458
+0: testsys.remote_gdb.listener: listening for remote gdb on port 7001
+0: drivesys.remote_gdb.listener: listening for remote gdb on port 7002
 warn: Entering event queue @ 0.  Starting simulation...
 warn: Obsolete M5 instruction ivlb encountered.
diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout
index 08d7271d7..3b074da7f 100644
--- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout
+++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout
@@ -5,11 +5,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jun 10 2007 14:10:03
-M5 started Mon Jun 11 01:47:32 2007
-M5 executing on iceaxe
-command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic tests/run.py quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic
+M5 compiled Aug  3 2007 04:02:11
+M5 started Fri Aug  3 04:26:58 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic tests/run.py quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic
 Global frequency set at 1000000000000 ticks per second
-      0: testsys.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
-      0: drivesys.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
 Exiting @ tick 4300235844056 because checkpoint
-- 
cgit v1.2.3


From c2b533cc3b0545b4ffd1d67aa3cc7d94029bd96a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@gmail.com>
Date: Fri, 3 Aug 2007 16:27:51 -0700
Subject: Add cscope files to .hgignore.

--HG--
extra : convert_revision : 82598579baf50cd258714c7e533b96bc6bd1305a
---
 .hgignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.hgignore b/.hgignore
index cbbb7b492..f536836de 100644
--- a/.hgignore
+++ b/.hgignore
@@ -2,5 +2,7 @@ syntax: glob
 build
 parser.out
 parsetab.py
+cscope.files
+cscope.out
 *.pyc
 *~
-- 
cgit v1.2.3


From e8e1ddd5305c4f7d4764f2cd28f70f911a29806f Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 15:56:48 -0700
Subject: SimpleCPU:  Add some DPRINTFs

--HG--
extra : convert_revision : 5fdd5a9595c3e5d6ce5f9e8c9af0a8e6c857551c
---
 src/cpu/simple/atomic.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 604c48086..704b65f36 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -183,6 +183,7 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 void
 AtomicSimpleCPU::resume()
 {
+    DPRINTF(SimpleCPU, "Resume\n");
     if (_status != SwitchedOut && _status != Idle) {
         assert(system->getMemoryMode() == Enums::atomic);
 
@@ -231,6 +232,8 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 void
 AtomicSimpleCPU::activateContext(int thread_num, int delay)
 {
+    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
+
     assert(thread_num == 0);
     assert(thread);
 
@@ -248,6 +251,8 @@ AtomicSimpleCPU::activateContext(int thread_num, int delay)
 void
 AtomicSimpleCPU::suspendContext(int thread_num)
 {
+    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
+
     assert(thread_num == 0);
     assert(thread);
 
@@ -483,6 +488,8 @@ AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
 void
 AtomicSimpleCPU::tick()
 {
+    DPRINTF(SimpleCPU, "Tick\n");
+
     Tick latency = cycles(1); // instruction takes one cycle by default
 
     for (int i = 0; i < width; ++i) {
-- 
cgit v1.2.3


From d8900d8478d86789d1120f11c9918d65a456d96e Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:00:36 -0700
Subject: main: return an an exit code of 1 when we exit due to a python
 exception. This requires us to not use PyRun_SimpleString, but PyRun_String
 since the latter actually returns a result

--HG--
extra : convert_revision : 3e3916ddd7eef9957569d8e72e73ba4c3160ce20
---
 src/sim/main.cc | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/sim/main.cc b/src/sim/main.cc
index 5bf4add4b..62ab9445b 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -75,6 +75,39 @@ abortHandler(int sigtype)
     ccprintf(cerr, "Program aborted at cycle %d\n", curTick);
 }
 
+int
+python_main()
+{
+    PyObject *module;
+    PyObject *dict;
+    PyObject *result;
+
+    module = PyImport_AddModule("__main__");
+    if (module == NULL)
+        fatal("Could not import __main__");
+
+    dict = PyModule_GetDict(module);
+
+    result = PyRun_String("import m5.main", Py_file_input, dict, dict);
+    if (!result) {
+        PyErr_Print();
+        return 1;
+    }
+    Py_DECREF(result);
+
+    result = PyRun_String("m5.main.main()", Py_file_input, dict, dict);
+    if (!result) {
+        PyErr_Print();
+        return 1;
+    }
+    Py_DECREF(result);
+
+    if (Py_FlushLine())
+        PyErr_Clear();
+
+    return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -114,9 +147,10 @@ main(int argc, char **argv)
     // initialize SWIG modules
     init_swig();
 
-    PyRun_SimpleString("import m5.main");
-    PyRun_SimpleString("m5.main.main()");
+    int ret = python_main();
 
     // clean up Python intepreter.
     Py_Finalize();
+
+    return ret;
 }
-- 
cgit v1.2.3


From 157bd25802d0775ea5b2b7c217f3e57f51562dee Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:02:04 -0700
Subject: python: provide access to stats

--HG--
extra : convert_revision : 18a4e9ef21bd77ec73482557e028d535f0c1f273
---
 src/python/m5/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py
index 96cb2ca13..f21bb362e 100644
--- a/src/python/m5/__init__.py
+++ b/src/python/m5/__init__.py
@@ -92,6 +92,7 @@ if running_m5:
     from event import *
     from simulate import *
     from main import options
+    import stats
 
 import SimObject
 import params
-- 
cgit v1.2.3


From 300712c0d118646d2d2ea206f8d27fd43dbd9040 Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:05:18 -0700
Subject: swig: %include all of the enums to get all of the definitions.
 (instead of %import)

--HG--
extra : convert_revision : bc4a39d7be3aad59b34d55aa8dd2c28285f09db9
---
 src/python/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/generate.py b/src/python/generate.py
index 99d0fb68c..6b167552e 100644
--- a/src/python/generate.py
+++ b/src/python/generate.py
@@ -270,7 +270,7 @@ class Generate(object):
         enums = list(enums)
         enums.sort()
         for enum in enums:
-            print >>out, '%%import "enums/%s.hh"' % enum.__name__
+            print >>out, '%%include "enums/%s.hh"' % enum.__name__
         print >>out
 
         for obj in ordered_objs:
-- 
cgit v1.2.3


From 5a27431b969ed0557d2a079066d082153f97af9d Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:06:19 -0700
Subject: python: use the enum values in the memory mode changing code

--HG--
extra : convert_revision : 2e399b2b407922ad076f93d33af73e3ba4c05218
---
 src/python/m5/simulate.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index c703664d4..6db25f0ed 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -148,20 +148,20 @@ def changeToAtomic(system):
     if not isinstance(system, (objects.Root, objects.System)):
         raise TypeError, "Parameter of type '%s'.  Must be type %s or %s." % \
               (type(system), objects.Root, objects.System)
-    if system.getMemoryMode() != objects.params.SimObject.Atomic:
+    if system.getMemoryMode() != objects.params.atomic:
         doDrain(system)
         print "Changing memory mode to atomic"
-        system.changeTiming(objects.params.SimObject.Atomic)
+        system.changeTiming(objects.params.atomic)
 
 def changeToTiming(system):
     if not isinstance(system, (objects.Root, objects.System)):
         raise TypeError, "Parameter of type '%s'.  Must be type %s or %s." % \
               (type(system), objects.Root, objects.System)
 
-    if system.getMemoryMode() != objects.params.SimObject.Timing:
+    if system.getMemoryMode() != objects.params.timing:
         doDrain(system)
         print "Changing memory mode to timing"
-        system.changeTiming(objects.params.SimObject.Timing)
+        system.changeTiming(objects.params.timing)
 
 def switchCpus(cpuList):
     print "switching cpus"
-- 
cgit v1.2.3


From 7a996ccc98421b361f6dfd8fe6e949299152935b Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:09:24 -0700
Subject: switching: Remove the drain and resume code from the switching code.
 This allows us to change memory modes as well. Clean up the code while we're
 at it.

--HG--
extra : convert_revision : fc5fee9ffd08b791f0607ee2688f32aa65d15354
---
 src/python/m5/simulate.py | 37 +++++++++++--------------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 6db25f0ed..6c70d8fbd 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -167,34 +167,19 @@ def switchCpus(cpuList):
     print "switching cpus"
     if not isinstance(cpuList, list):
         raise RuntimeError, "Must pass a list to this function"
-    for i in cpuList:
-        if not isinstance(i, tuple):
+    for item in cpuList:
+        if not isinstance(item, tuple) or len(item) != 2:
             raise RuntimeError, "List must have tuples of (oldCPU,newCPU)"
 
-    [old_cpus, new_cpus] = zip(*cpuList)
+    for old_cpu, new_cpu in cpuList:
+        if not isinstance(old_cpu, objects.BaseCPU):
+            raise TypeError, "%s is not of type BaseCPU" % old_cpu
+        if not isinstance(new_cpu, objects.BaseCPU):
+            raise TypeError, "%s is not of type BaseCPU" % new_cpu
 
-    for cpu in old_cpus:
-        if not isinstance(cpu, objects.BaseCPU):
-            raise TypeError, "%s is not of type BaseCPU" % cpu
-    for cpu in new_cpus:
-        if not isinstance(cpu, objects.BaseCPU):
-            raise TypeError, "%s is not of type BaseCPU" % cpu
-
-    # Drain all of the individual CPUs
-    drain_event = internal.event.createCountedDrain()
-    unready_cpus = 0
-    for old_cpu in old_cpus:
-        unready_cpus += old_cpu.startDrain(drain_event, False)
-    # If we've got some objects that can't drain immediately, then simulate
-    if unready_cpus > 0:
-        drain_event.setCount(unready_cpus)
-        simulate()
-    internal.event.cleanupCountedDrain(drain_event)
     # Now all of the CPUs are ready to be switched out
-    for old_cpu in old_cpus:
+    for old_cpu, new_cpu in cpuList:
         old_cpu._ccObject.switchOut()
-    index = 0
-    for new_cpu in new_cpus:
-        new_cpu.takeOverFrom(old_cpus[index])
-        new_cpu._ccObject.resume()
-        index += 1
+
+    for old_cpu, new_cpu in cpuList:
+        new_cpu.takeOverFrom(old_cpu)
-- 
cgit v1.2.3


From df015f17a45b18302565c43d3790d787e1b54c42 Mon Sep 17 00:00:00 2001
From: Nathan Binkert <nate@binkert.org>
Date: Sat, 4 Aug 2007 16:11:11 -0700
Subject: switching: turn on profiling after a switch if there's an event

--HG--
extra : convert_revision : 689e5b85c47bb2aaceb7eb38c2a24a2e5b69376c
---
 src/cpu/base.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index ee409048b..a54ed9349 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -343,9 +343,8 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU, Port *ic, Port *dc)
     for (int i = 0; i < threadContexts.size(); ++i)
         threadContexts[i]->profileClear();
 
-    // The Sampler must take care of this!
-//    if (profileEvent)
-//        profileEvent->schedule(curTick);
+    if (profileEvent)
+        profileEvent->schedule(curTick);
 #endif
 
     // Connect new CPU to old CPU's memory only if new CPU isn't
-- 
cgit v1.2.3