11 files changed, 368 insertions, 142 deletions
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index a172847df..71ea58416 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -44,6 +44,8 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache,
     : Port(_name), cache(_cache), isCpuSide(_isCpuSide)
 {
     blocked = false;
+    cshrRetry = NULL;
+    waitingOnRetry = false;
     //Start ports at null if more than one is created we should panic
     //cpuSidePort = NULL;
     //memSidePort = NULL;
@@ -71,7 +73,23 @@ BaseCache::CachePort::deviceBlockSize()
 bool
 BaseCache::CachePort::recvTiming(Packet *pkt)
 {
-    if (blocked)
+    if (isCpuSide
+        && !pkt->req->isUncacheable()
+        && pkt->isInvalidate()
+        && !pkt->isRead() && !pkt->isWrite()) {
+        //Upgrade or Invalidate
+        //Look into what happens if two slave caches on bus
+        DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(),
+                pkt->getAddr() & (((ULL(1))<<48)-1),
+                pkt->getAddr() & ~((Addr)cache->blkSize - 1));
+
+        assert(!(pkt->flags & SATISFIED));
+        pkt->flags |= SATISFIED;
+        //Invalidates/Upgrades need no response if they get the bus
+        return true;
+    }
+
+    if (pkt->isRequest() && blocked)
     {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
@@ -96,16 +114,44 @@ void
 BaseCache::CachePort::recvRetry()
 {
     Packet *pkt;
-
-    if (!isCpuSide)
+    assert(waitingOnRetry);
+    if (!drainList.empty()) {
+        DPRINTF(CachePort, "%s attempting to send a retry for response\n", name());
+        //We have some responses to drain first
+        if (sendTiming(drainList.front())) {
+            DPRINTF(CachePort, "%s sucessful in sending a retry for response\n", name());
+            drainList.pop_front();
+            if (!drainList.empty() ||
+                !isCpuSide && cache->doMasterRequest() ||
+                isCpuSide && cache->doSlaveRequest()) {
+
+                DPRINTF(CachePort, "%s has more responses/requests\n", name());
+                BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
+                reqCpu->schedule(curTick + 1);
+            }
+            waitingOnRetry = false;
+        }
+    }
+    else if (!isCpuSide)
     {
+        DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
+        if (!cache->doMasterRequest()) {
+            //This can happen if I am the owner of a block and see an upgrade
+            //while the block was in my WB Buffers.  I just remove the
+            //wb and de-assert the masterRequest
+            waitingOnRetry = false;
+            return;
+        }
         pkt = cache->getPacket();
+        MSHR* mshr = (MSHR*)pkt->senderState;
         bool success = sendTiming(pkt);
         DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                 pkt->getAddr(), success ? "succesful" : "unsuccesful");
-        cache->sendResult(pkt, success);
+        cache->sendResult(pkt, mshr, success);
+        waitingOnRetry = !success;
         if (success && cache->doMasterRequest())
         {
+            DPRINTF(CachePort, "%s has more requests\n", name());
             //Still more to issue, rerequest in 1 cycle
             pkt = NULL;
             BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
@@ -114,17 +160,23 @@ BaseCache::CachePort::recvRetry()
     }
     else
     {
-        pkt = cache->getCoherencePacket();
+        assert(cshrRetry);
+        //pkt = cache->getCoherencePacket();
+        //We save the packet, no reordering on CSHRS
+        pkt = cshrRetry;
         bool success = sendTiming(pkt);
+        waitingOnRetry = !success;
         if (success && cache->doSlaveRequest())
         {
             //Still more to issue, rerequest in 1 cycle
             pkt = NULL;
             BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
             reqCpu->schedule(curTick + 1);
+            cshrRetry = NULL;
         }
-
     }
+    if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name());
+    else DPRINTF(CachePort, "%s no longer waiting on retry\n", name());
     return;
 }
 void
@@ -169,16 +221,47 @@ BaseCache::CacheEvent::process()
 {
     if (!pkt)
     {
-        if (!cachePort->isCpuSide)
-        {
-            //MSHR
+        if (cachePort->waitingOnRetry) return;
+       //We have some responses to drain first
+        if (!cachePort->drainList.empty()) {
+            DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name());
+            if (cachePort->sendTiming(cachePort->drainList.front())) {
+                DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name());
+                cachePort->drainList.pop_front();
+                if (!cachePort->drainList.empty() ||
+                    !cachePort->isCpuSide && cachePort->cache->doMasterRequest() ||
+                    cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) {
+
+                    DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name());
+                    this->schedule(curTick + 1);
+                }
+            }
+            else {
+                cachePort->waitingOnRetry = true;
+                DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
+            }
+        }
+        else if (!cachePort->isCpuSide)
+        {            //MSHR
+            DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name());
+            if (!cachePort->cache->doMasterRequest()) {
+                //This can happen if I am the owner of a block and see an upgrade
+                //while the block was in my WB Buffers.  I just remove the
+                //wb and de-assert the masterRequest
+                return;
+            }
+
             pkt = cachePort->cache->getPacket();
+            MSHR* mshr = (MSHR*) pkt->senderState;
             bool success = cachePort->sendTiming(pkt);
             DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                     pkt->getAddr(), success ? "succesful" : "unsuccesful");
-            cachePort->cache->sendResult(pkt, success);
+            cachePort->cache->sendResult(pkt, mshr, success);
+            cachePort->waitingOnRetry = !success;
+            if (cachePort->waitingOnRetry) DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
             if (success && cachePort->cache->doMasterRequest())
             {
+                DPRINTF(CachePort, "%s still more MSHR requests to send\n", cachePort->name());
                 //Still more to issue, rerequest in 1 cycle
                 pkt = NULL;
                 this->schedule(curTick+1);
@@ -186,10 +269,16 @@ BaseCache::CacheEvent::process()
         }
         else
         {
+            assert(cachePort->cache->doSlaveRequest());
             //CSHR
             pkt = cachePort->cache->getCoherencePacket();
             bool success = cachePort->sendTiming(pkt);
-            if (success && cachePort->cache->doSlaveRequest())
+            if (!success) {
+                //Need to send on a retry
+                cachePort->cshrRetry = pkt;
+                cachePort->waitingOnRetry = true;
+            }
+            else if (cachePort->cache->doSlaveRequest())
             {
                 //Still more to issue, rerequest in 1 cycle
                 pkt = NULL;
@@ -199,8 +288,24 @@ BaseCache::CacheEvent::process()
         return;
     }
     //Response
-    //Know the packet to send, no need to mark in service (must succed)
-    assert(cachePort->sendTiming(pkt));
+    //Know the packet to send
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
+    pkt->makeTimingResponse();
+    DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name());
+    if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) {
+        //Already have a list, just append
+        cachePort->drainList.push_back(pkt);
+        DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name());
+    }
+    else if (!cachePort->sendTiming(pkt)) {
+        //It failed, save it to list of drain events
+        DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name());
+        cachePort->drainList.push_back(pkt);
+        cachePort->waitingOnRetry = true;
+    }
 }
 
 const char *
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 069dbab58..563b1ca8b 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -72,6 +72,7 @@ enum RequestCause{
     Request_PF
 };
 
+class MSHR;
 /**
  * A basic cache interface. Implements some common functions for speed.
  */
@@ -110,6 +111,12 @@ class BaseCache : public MemObject
         bool mustSendRetry;
 
         bool isCpuSide;
+
+        bool waitingOnRetry;
+
+        std::list<Packet *> drainList;
+
+        Packet *cshrRetry;
     };
 
     struct CacheEvent : public Event
@@ -127,6 +134,8 @@ class BaseCache : public MemObject
     CachePort *cpuSidePort;
     CachePort *memSidePort;
 
+    bool snoopRangesSent;
+
   public:
     virtual Port *getPort(const std::string &if_name, int idx = -1);
 
@@ -149,14 +158,15 @@ class BaseCache : public MemObject
 
     void recvStatusChange(Port::Status status, bool isCpuSide)
     {
-        if (status == Port::RangeChange)
-        {
-            if (!isCpuSide)
-            {
+        if (status == Port::RangeChange){
+            if (!isCpuSide) {
                 cpuSidePort->sendStatusChange(Port::RangeChange);
+                if (!snoopRangesSent) {
+                    snoopRangesSent = true;
+                    memSidePort->sendStatusChange(Port::RangeChange);
+                }
             }
-            else
-            {
+            else {
                 memSidePort->sendStatusChange(Port::RangeChange);
             }
         }
@@ -172,7 +182,7 @@ class BaseCache : public MemObject
         fatal("No implementation");
     }
 
-    virtual void sendResult(Packet* &pkt, bool success)
+    virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success)
     {
 
         fatal("No implementation");
@@ -205,6 +215,7 @@ class BaseCache : public MemObject
     /** True if this cache is connected to the CPU. */
     bool topLevelCache;
 
+
     /** Stores time the cache blocked for statistics. */
     Tick blockedCycle;
 
@@ -332,6 +343,7 @@ class BaseCache : public MemObject
         //Start ports at null if more than one is created we should panic
         cpuSidePort = NULL;
         memSidePort = NULL;
+        snoopRangesSent = false;
     }
 
     virtual void init();
@@ -382,9 +394,14 @@ class BaseCache : public MemObject
             blocked_causes[cause]++;
             blockedCycle = curTick;
         }
-        blocked |= flag;
-        DPRINTF(Cache,"Blocking for cause %s\n", cause);
-        cpuSidePort->setBlocked();
+        int old_state = blocked;
+        if (!(blocked & flag)) {
+            //Wasn't already blocked for this cause
+            blocked |= flag;
+            DPRINTF(Cache,"Blocking for cause %s\n", cause);
+            if (!old_state)
+                cpuSidePort->setBlocked();
+        }
     }
 
     /**
@@ -395,8 +412,13 @@ class BaseCache : public MemObject
     void setBlockedForSnoop(BlockedCause cause)
     {
         uint8_t flag = 1 << cause;
-        blockedSnoop |= flag;
-        memSidePort->setBlocked();
+        uint8_t old_state = blockedSnoop;
+        if (!(blockedSnoop & flag)) {
+            //Wasn't already blocked for this cause
+            blockedSnoop |= flag;
+            if (!old_state)
+                memSidePort->setBlocked();
+        }
     }
 
     /**
@@ -445,7 +467,7 @@ class BaseCache : public MemObject
      */
     void setMasterRequest(RequestCause cause, Tick time)
     {
-        if (!doMasterRequest())
+        if (!doMasterRequest() && !memSidePort->waitingOnRetry)
         {
             BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort);
             reqCpu->schedule(time);
@@ -503,10 +525,14 @@ class BaseCache : public MemObject
      */
     void respond(Packet *pkt, Tick time)
     {
-        pkt->makeTimingResponse();
-        pkt->result = Packet::Success;
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
+        else {
+            if (pkt->cmd == Packet::Writeback) delete pkt->req;
+            delete pkt;
+        }
     }
 
     /**
@@ -517,22 +543,29 @@ class BaseCache : public MemObject
     void respondToMiss(Packet *pkt, Tick time)
     {
         if (!pkt->req->isUncacheable()) {
-            missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time;
+            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
+        }
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
+        else {
+            if (pkt->cmd == Packet::Writeback) delete pkt->req;
+            delete pkt;
         }
-        pkt->makeTimingResponse();
-        pkt->result = Packet::Success;
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
     }
 
     /**
      * Suppliess the data if cache to cache transfers are enabled.
      * @param pkt The bus transaction to fulfill.
      */
-    void respondToSnoop(Packet *pkt)
+    void respondToSnoop(Packet *pkt, Tick time)
     {
-        assert("Implement\n" && 0);
+//        assert("Implement\n" && 0);
 //	mi->respond(pkt,curTick + hitLatency);
+        assert (pkt->needsResponse());
+        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
+        reqMem->schedule(time);
     }
 
     /**
@@ -551,6 +584,16 @@ class BaseCache : public MemObject
         else
         {
             //This is where snoops get updated
+            AddrRangeList dummy;
+//            if (!topLevelCache)
+//            {
+                cpuSidePort->getPeerAddressRanges(dummy, snoop);
+//            }
+//            else
+//            {
+//                snoop.push_back(RangeSize(0,-1));
+//            }
+
             return;
         }
     }
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 989b8743e..41b270030 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -103,6 +103,7 @@ class Cache : public BaseCache
       * Used to append to target list, to cause an invalidation.
       */
     Packet * invalidatePkt;
+    Request *invalidateReq;
 
     /**
      * Temporarily move a block into a MSHR.
@@ -175,7 +176,7 @@ class Cache : public BaseCache
      * @param pkt The request.
      * @param success True if the request was sent successfully.
      */
-    virtual void sendResult(Packet * &pkt, bool success);
+    virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success);
 
     /**
      * Handles a response (cache line fill/write ack) from the bus.
@@ -251,7 +252,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick probe(Packet * &pkt, bool update);
+    Tick probe(Packet * &pkt, bool update, CachePort * otherSidePort);
 
     /**
      * Snoop for the provided request in the cache and return the estimated
@@ -262,7 +263,7 @@ class Cache : public BaseCache
      * request.
      * @return The estimated completion time.
      */
-    Tick snoopProbe(Packet * &pkt, bool update);
+    Tick snoopProbe(Packet * &pkt);
 };
 
 #endif // __CACHE_HH__
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 11cd84e88..a68418f24 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -51,7 +51,7 @@
 #include "mem/cache/miss/mshr.hh"
 #include "mem/cache/prefetch/prefetcher.hh"
 
-#include "sim/sim_events.hh" // for SimExitEvent
+#include "sim/sim_exit.hh" // for SimExitEvent
 
 template<class TagStore, class Buffering, class Coherence>
 bool
@@ -60,17 +60,21 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
         access(pkt);
+
     }
     else
     {
         if (pkt->isResponse())
             handleResponse(pkt);
-        else
-            snoop(pkt);
+        else {
+            //Check if we should do the snoop
+            if (pkt->flags & SNOOP_COMMIT)
+                snoop(pkt);
+        }
     }
     return true;
 }
@@ -83,11 +87,11 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
     if (isCpuSide)
     {
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
 
-        probe(pkt, true);
+        probe(pkt, true, NULL);
         //TEMP ALWAYS SUCCES FOR NOW
         pkt->result = Packet::Success;
     }
@@ -96,7 +100,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
         if (pkt->isResponse())
             handleResponse(pkt);
         else
-            snoopProbe(pkt, true);
+            return snoopProbe(pkt);
     }
     //Fix this timing info
     return hitLatency;
@@ -113,20 +117,17 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
         pkt->req->setThreadContext(0,0);
 
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             assert("Can't handle LL/SC on functional path\n");
         }
 
-        probe(pkt, true);
+        probe(pkt, false, memSidePort);
         //TEMP ALWAYS SUCCESFUL FOR NOW
         pkt->result = Packet::Success;
     }
     else
     {
-        if (pkt->isResponse())
-            handleResponse(pkt);
-        else
-            snoopProbe(pkt, true);
+            probe(pkt, false, cpuSidePort);
     }
 }
 
@@ -147,7 +148,8 @@ Cache(const std::string &_name,
       prefetchAccess(params.prefetchAccess),
       tags(params.tags), missQueue(params.missQueue),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      doCopy(params.doCopy), blockOnCopy(params.blockOnCopy)
+      doCopy(params.doCopy), blockOnCopy(params.blockOnCopy),
+      hitLatency(params.hitLatency)
 {
 //FIX BUS POINTERS
 //    if (params.in == NULL) {
@@ -162,10 +164,8 @@ Cache(const std::string &_name,
     prefetcher->setCache(this);
     prefetcher->setTags(tags);
     prefetcher->setBuffer(missQueue);
-#if 0
-    invalidatePkt = new Packet;
-    invalidatePkt->cmd = Packet::InvalidateReq;
-#endif
+    invalidateReq = new Request((Addr) NULL, blkSize, 0);
+    invalidatePkt = new Packet(invalidateReq, Packet::InvalidateReq, 0);
 }
 
 template<class TagStore, class Buffering, class Coherence>
@@ -194,20 +194,6 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         prefetcher->handleMiss(pkt, curTick);
     }
     if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead()
-            && !pkt->isWrite()) {
-            //Upgrade or Invalidate
-            //Look into what happens if two slave caches on bus
-            DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(),
-                    pkt->getAddr() & (((ULL(1))<<48)-1),
-                    pkt->getAddr() & ~((Addr)blkSize - 1));
-
-            //@todo Should this return latency have the hit latency in it?
-//	    respond(pkt,curTick+lat);
-            pkt->flags |= SATISFIED;
-//            return MA_HIT; //@todo, return values
-            return true;
-        }
         blk = tags->handleAccess(pkt, lat, writebacks);
     } else {
         size = pkt->getSize();
@@ -234,27 +220,30 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         missQueue->doWriteback(writebacks.front());
         writebacks.pop_front();
     }
-    DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(),
+    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
             pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
-            pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC());
+            pkt->getAddr() & ~((Addr)blkSize - 1));
     if (blk) {
         // Hit
-        hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         // clear dirty bit if write through
         if (pkt->needsResponse())
             respond(pkt, curTick+lat);
-//	return MA_HIT;
+        if (pkt->cmd == Packet::Writeback) {
+            //Signal that you can kill the pkt/req
+            pkt->flags |= SATISFIED;
+        }
         return true;
     }
 
     // Miss
     if (!pkt->req->isUncacheable()) {
-        misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         /** @todo Move miss count code into BaseCache */
         if (missCount) {
             --missCount;
             if (missCount == 0)
-                new SimLoopExitEvent(curTick, "A cache reached the maximum miss count");
+                exitSimLoop("A cache reached the maximum miss count");
         }
     }
     missQueue->handleMiss(pkt, size, curTick + hitLatency);
@@ -267,10 +256,11 @@ template<class TagStore, class Buffering, class Coherence>
 Packet *
 Cache<TagStore,Buffering,Coherence>::getPacket()
 {
+    assert(missQueue->havePending());
     Packet * pkt = missQueue->getPacket();
     if (pkt) {
         if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++;
+            if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
             BlkType *blk = tags->findBlock(pkt);
             Packet::Command cmd = coherence->getBusCmd(pkt->cmd,
                                               (blk)? blk->status : 0);
@@ -285,15 +275,30 @@ Cache<TagStore,Buffering,Coherence>::getPacket()
 
 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success)
+Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success)
 {
-    if (success) {
-        missQueue->markInService(pkt);
-          //Temp Hack for UPGRADES
-          if (pkt->cmd == Packet::UpgradeReq) {
-              handleResponse(pkt);
-          }
+    if (success && !(pkt->flags & NACKED_LINE)) {
+        missQueue->markInService(pkt, mshr);
+        //Temp Hack for UPGRADES
+        if (pkt->cmd == Packet::UpgradeReq) {
+            pkt->flags &= ~CACHE_LINE_FILL;
+            BlkType *blk = tags->findBlock(pkt);
+            CacheBlk::State old_state = (blk) ? blk->status : 0;
+            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
+            if (old_state != new_state)
+                DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
+            //Set the state on the upgrade
+            memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
+            PacketList writebacks;
+            tags->handleFill(blk, mshr, new_state, writebacks, pkt);
+            assert(writebacks.empty());
+            missQueue->handleResponse(pkt, curTick + hitLatency);
+        }
     } else if (pkt && !pkt->req->isUncacheable()) {
+        pkt->flags &= ~NACKED_LINE;
+        pkt->flags &= ~SATISFIED;
+        pkt->flags &= ~SNOOP_COMMIT;
         missQueue->restoreOrigCmd(pkt);
     }
 }
@@ -304,6 +309,14 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
 {
     BlkType *blk = NULL;
     if (pkt->senderState) {
+        if (pkt->result == Packet::Nacked) {
+            //pkt->reinitFromRequest();
+            warn("NACKs from devices not connected to the same bus not implemented\n");
+            return;
+        }
+        if (pkt->result == Packet::BadAddress) {
+            //Make the response a Bad address and send it
+        }
 //	MemDebug::cacheResponse(pkt);
         DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(),
                 pkt->getAddr() & (((ULL(1))<<48)-1));
@@ -312,11 +325,15 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
             blk = tags->findBlock(pkt);
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             PacketList writebacks;
+            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
+            if (old_state != new_state)
+                DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
+                        pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
             blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
-                                   coherence->getNewState(pkt,old_state),
-                                   writebacks);
+                                   new_state, writebacks, pkt);
             while (!writebacks.empty()) {
                     missQueue->doWriteback(writebacks.front());
+                    writebacks.pop_front();
             }
         }
         missQueue->handleResponse(pkt, curTick + hitLatency);
@@ -372,7 +389,6 @@ template<class TagStore, class Buffering, class Coherence>
 void
 Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
 {
-
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt);
     MSHR *mshr = missQueue->findMSHR(blk_addr);
@@ -385,7 +401,12 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     //If the outstanding request was an invalidate (upgrade,readex,..)
                     //Then we need to ACK the request until we get the data
                     //Also NACK if the outstanding request is not a cachefill (writeback)
+                    assert(!(pkt->flags & SATISFIED));
+                    pkt->flags |= SATISFIED;
                     pkt->flags |= NACKED_LINE;
+                    ///@todo NACK's from other levels
+                    //warn("NACKs from devices not connected to the same bus not implemented\n");
+                    //respondToSnoop(pkt, curTick + hitLatency);
                     return;
                 }
                 else {
@@ -398,6 +419,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     //@todo Make it so that a read to a pending read can't be exclusive now.
 
                     //Set the address so find match works
+                    //panic("Don't have invalidates yet\n");
                     invalidatePkt->addrOverride(pkt->getAddr());
 
                     //Append the invalidate on
@@ -420,6 +442,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                     if (pkt->isRead()) {
                         //Only Upgrades don't get here
                         //Supply the data
+                        assert(!(pkt->flags & SATISFIED));
                         pkt->flags |= SATISFIED;
 
                         //If we are in an exclusive protocol, make it ask again
@@ -427,18 +450,18 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                         pkt->flags |= SHARED_LINE;
 
                         assert(pkt->isRead());
-                        Addr offset = pkt->getAddr() & ~(blkSize - 1);
+                        Addr offset = pkt->getAddr() & (blkSize - 1);
                         assert(offset < blkSize);
                         assert(pkt->getSize() <= blkSize);
                         assert(offset + pkt->getSize() <=blkSize);
                         memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
 
-                        respondToSnoop(pkt);
+                        respondToSnoop(pkt, curTick + hitLatency);
                     }
 
                     if (pkt->isInvalidate()) {
                         //This must be an upgrade or other cache will take ownership
-                        missQueue->markInService(mshr->pkt);
+                        missQueue->markInService(mshr->pkt, mshr);
                     }
                     return;
                 }
@@ -448,10 +471,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
     CacheBlk::State new_state;
     bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
     if (satisfy) {
+        DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data,"
+                "new state is %i\n",
+                pkt->cmdString(), blk_addr, new_state);
+
         tags->handleSnoop(blk, new_state, pkt);
-        respondToSnoop(pkt);
+        respondToSnoop(pkt, curTick + hitLatency);
         return;
     }
+    if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n",
+                     pkt->cmdString(), blk_addr, new_state);
     tags->handleSnoop(blk, new_state);
 }
 
@@ -486,7 +515,7 @@ Cache<TagStore,Buffering,Coherence>::invalidateBlk(Addr addr)
  */
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
+Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort* otherSidePort)
 {
 //    MemDebug::cacheProbe(pkt);
     if (!pkt->req->isUncacheable()) {
@@ -505,6 +534,10 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
     int lat;
     BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update);
 
+    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
+            pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
+            pkt->getAddr() & ~((Addr)blkSize - 1));
+
     if (!blk) {
         // Need to check for outstanding misses and writes
         Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
@@ -517,7 +550,8 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
         missQueue->findWrites(blk_addr, writes);
 
         if (!update) {
-            memSidePort->sendFunctional(pkt);
+                otherSidePort->sendFunctional(pkt);
+
             // Check for data in MSHR and writebuffer.
             if (mshr) {
                 warn("Found outstanding miss on an non-update probe");
@@ -596,7 +630,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
             // update the cache state and statistics
             if (mshr || !writes.empty()){
                 // Can't handle it, return pktuest unsatisfied.
-                return 0;
+                panic("Atomic access ran into outstanding MSHR's or WB's!");
             }
             if (!pkt->req->isUncacheable()) {
                 // Fetch the cache block to fill
@@ -610,23 +644,46 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
 
                 busPkt->time = curTick;
 
+                DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n",
+                        busPkt->cmdString(),
+                        busPkt->getAddr() & (((ULL(1))<<48)-1),
+                        busPkt->getAddr() & ~((Addr)blkSize - 1));
+
                 lat = memSidePort->sendAtomic(busPkt);
 
+                //Be sure to flip the response to a request for coherence
+                if (busPkt->needsResponse()) {
+                    busPkt->makeAtomicResponse();
+                }
+
 /*		if (!(busPkt->flags & SATISFIED)) {
                     // blocked at a higher level, just return
                     return 0;
                 }
 
-*/		misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+*/		misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
                 CacheBlk::State old_state = (blk) ? blk->status : 0;
+                CacheBlk::State new_state = coherence->getNewState(busPkt, old_state);
+                    DPRINTF(Cache, "Receive response:%s for blk addr %x in state %i\n",
+                            busPkt->cmdString(),
+                            busPkt->getAddr() & (((ULL(1))<<48)-1), old_state);
+                if (old_state != new_state)
+                    DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
+                            busPkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
+
                 tags->handleFill(blk, busPkt,
-                                 coherence->getNewState(busPkt, old_state),
+                                 new_state,
                                  writebacks, pkt);
+                //Free the packet
+                delete busPkt;
+
                 // Handle writebacks if needed
                 while (!writebacks.empty()){
-                    memSidePort->sendAtomic(writebacks.front());
+                    Packet *wbPkt = writebacks.front();
+                    memSidePort->sendAtomic(wbPkt);
                     writebacks.pop_front();
+                    delete wbPkt;
                 }
                 return lat + hitLatency;
             } else {
@@ -642,12 +699,12 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
         }
 
         if (update) {
-            hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         } else if (pkt->isWrite()) {
             // Still need to change data in all locations.
-            return memSidePort->sendAtomic(pkt);
+            otherSidePort->sendFunctional(pkt);
         }
-        return curTick + lat;
+        return hitLatency;
     }
     fatal("Probe not handled.\n");
     return 0;
@@ -655,18 +712,24 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
 
 template<class TagStore, class Buffering, class Coherence>
 Tick
-Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt, bool update)
+Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt);
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    CacheBlk::State new_state = 0;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-    if (satisfy) {
-        tags->handleSnoop(blk, new_state, pkt);
-        return hitLatency;
-    }
-    tags->handleSnoop(blk, new_state);
-    return 0;
+        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+        BlkType *blk = tags->findBlock(pkt);
+        MSHR *mshr = missQueue->findMSHR(blk_addr);
+        CacheBlk::State new_state = 0;
+        bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+        if (satisfy) {
+            DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data,"
+                    "new state is %i\n",
+                    pkt->cmdString(), blk_addr, new_state);
+
+            tags->handleSnoop(blk, new_state, pkt);
+            return hitLatency;
+        }
+        if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n",
+                     pkt->cmdString(), blk_addr, new_state);
+        tags->handleSnoop(blk, new_state);
+        return 0;
 }
 
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index bcf3ce9c5..e28dda3dc 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     }
 
     Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq;
-    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp;
+    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp;
 
 //@todo add in hardware prefetch to this list
     if (protocol == "msi") {
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
index 67fc7ae56..f7aacff89 100644
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -123,12 +123,12 @@ BlockingBuffer::restoreOrigCmd(Packet * &pkt)
 }
 
 void
-BlockingBuffer::markInService(Packet * &pkt)
+BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr)
 {
     if (!pkt->isCacheFill() && pkt->isWrite()) {
         // Forwarding a write/ writeback, don't need to change
         // the command
-        assert((MSHR*)pkt->senderState == &wb);
+        assert(mshr == &wb);
         cache->clearMasterRequest(Request_WB);
         if (!pkt->needsResponse()) {
             assert(wb.getNumTargets() == 0);
@@ -138,7 +138,7 @@ BlockingBuffer::markInService(Packet * &pkt)
             wb.inService = true;
         }
     } else {
-        assert((MSHR*)pkt->senderState == &miss);
+        assert(mshr == &miss);
         cache->clearMasterRequest(Request_MSHR);
         if (!pkt->needsResponse()) {
             assert(miss.getNumTargets() == 0);
@@ -189,7 +189,7 @@ BlockingBuffer::squash(int threadNum)
     if (miss.threadNum == threadNum) {
         Packet * target = miss.getTarget();
         miss.popTarget();
-        assert(target->req->getThreadNum() == threadNum);
+        assert(0/*target->req->getThreadNum()*/ == threadNum);
         target = NULL;
         assert(!miss.hasTargets());
         miss.ntargets=0;
@@ -218,7 +218,7 @@ BlockingBuffer::doWriteback(Addr addr,
     }
 
     ///All writebacks charged to same thread @todo figure this out
-    writebacks[pkt->req->getThreadNum()]++;
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     wb.allocateAsBuffer(pkt);
     cache->setMasterRequest(Request_WB, curTick);
@@ -230,7 +230,7 @@ BlockingBuffer::doWriteback(Addr addr,
 void
 BlockingBuffer::doWriteback(Packet * &pkt)
 {
-    writebacks[pkt->req->getThreadNum()]++;
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     wb.allocateAsBuffer(pkt);
 
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
index 641d5a798..f7069696c 100644
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ b/src/mem/cache/miss/blocking_buffer.hh
@@ -152,7 +152,7 @@ public:
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
      */
-    void markInService(Packet * &pkt);
+    void markInService(Packet * &pkt, MSHR* mshr);
 
     /**
      * Frees the resources of the pktuest and unblock the cache.
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 76fb25716..c7b0e0890 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time)
 MSHR*
 MissQueue::allocateWrite(Packet * &pkt, int size, Tick time)
 {
-    MSHR* mshr = wb.allocate(pkt,blkSize);
+    MSHR* mshr = wb.allocate(pkt,size);
     mshr->order = order++;
 
 //REMOVING COMPRESSION FOR NOW
@@ -413,8 +413,8 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
         mshr = mq.findMatch(blkAddr);
         if (mshr) {
             //@todo remove hw_pf here
-            mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
-            if (mshr->threadNum != pkt->req->getThreadNum()) {
+            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
                 mshr->threadNum = -1;
             }
             mq.allocateTarget(mshr, pkt);
@@ -434,11 +434,11 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
             mshr_no_allocate_misses++;
         }
         else {
-            mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         }
     } else {
         //Count uncacheable accesses
-        mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+        mshr_uncacheable[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         size = pkt->getSize();
     }
     if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
@@ -446,7 +446,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
         /**
          * @todo Add write merging here.
          */
-        mshr = allocateWrite(pkt, blkSize, time);
+        mshr = allocateWrite(pkt, pkt->getSize(), time);
         return;
     }
 
@@ -499,7 +499,7 @@ MissQueue::getPacket()
         pkt = prefetcher->getPacket();
         if (pkt) {
             //Update statistic on number of prefetches issued (hwpf_mshr_misses)
-            mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++;
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             //It will request the bus for the future, but should clear that immedieatley
             allocateMiss(pkt, pkt->getSize(), curTick);
             pkt = mq.getReq();
@@ -515,6 +515,14 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd)
     assert(pkt->senderState != 0);
     MSHR * mshr = (MSHR*)pkt->senderState;
     mshr->originalCmd = pkt->cmd;
+    if (cmd == Packet::UpgradeReq || cmd == Packet::InvalidateReq) {
+        pkt->flags |= NO_ALLOCATE;
+        pkt->flags &= ~CACHE_LINE_FILL;
+    }
+    else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() &&
+             (cmd & (1 << 6)/*NeedsResponse*/)) {
+        pkt->flags |= CACHE_LINE_FILL;
+    }
     if (pkt->isCacheFill() || pkt->isNoAllocate())
         pkt->cmd = cmd;
 }
@@ -526,9 +534,8 @@ MissQueue::restoreOrigCmd(Packet * &pkt)
 }
 
 void
-MissQueue::markInService(Packet * &pkt)
+MissQueue::markInService(Packet * &pkt, MSHR* mshr)
 {
-    assert(pkt->senderState != 0);
     bool unblock = false;
     BlockedCause cause = NUM_BLOCKED_CAUSES;
 
@@ -540,7 +547,7 @@ MissQueue::markInService(Packet * &pkt)
         // Forwarding a write/ writeback, don't need to change
         // the command
         unblock = wb.isFull();
-        wb.markInService((MSHR*)pkt->senderState);
+        wb.markInService(mshr);
         if (!wb.havePending()){
             cache->clearMasterRequest(Request_WB);
         }
@@ -551,11 +558,11 @@ MissQueue::markInService(Packet * &pkt)
         }
     } else {
         unblock = mq.isFull();
-        mq.markInService((MSHR*)pkt->senderState);
+        mq.markInService(mshr);
         if (!mq.havePending()){
             cache->clearMasterRequest(Request_MSHR);
         }
-        if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
+        if (mshr->originalCmd == Packet::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
                     cache->name());
             //Also clear pending if need be
@@ -592,7 +599,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time)
     BlockedCause cause = NUM_BLOCKED_CAUSES;
 
     if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-        mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] +=
+        mshr_miss_latency[mshr->originalCmd][0/*pkt->req->getThreadNum()*/] +=
             curTick - pkt->time;
         // targets were handled in the cache tags
         if (mshr == noTargetMSHR) {
@@ -619,7 +626,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time)
         }
     } else {
         if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] +=
+            mshr_uncacheable_lat[pkt->cmd][0/*pkt->req->getThreadNum()*/] +=
                 curTick - pkt->time;
         }
         if (mshr->hasTargets() && pkt->req->isUncacheable()) {
@@ -725,7 +732,7 @@ MissQueue::doWriteback(Addr addr,
     }
 
     ///All writebacks charged to same thread @todo figure this out
-    writebacks[pkt->req->getThreadNum()]++;
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     allocateWrite(pkt, 0, curTick);
 }
@@ -734,7 +741,7 @@ MissQueue::doWriteback(Addr addr,
 void
 MissQueue::doWriteback(Packet * &pkt)
 {
-    writebacks[pkt->req->getThreadNum()]++;
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
     allocateWrite(pkt, 0, curTick);
 }
 
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
index 505d1f90c..179638d2b 100644
--- a/src/mem/cache/miss/miss_queue.hh
+++ b/src/mem/cache/miss/miss_queue.hh
@@ -256,7 +256,7 @@ class MissQueue
      * are successfully sent.
      * @param pkt The request that was sent on the bus.
      */
-    void markInService(Packet * &pkt);
+    void markInService(Packet * &pkt, MSHR* mshr);
 
     /**
      * Collect statistics and free resources of a satisfied pktuest.
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 519ec5ebd..455798f15 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -88,7 +88,7 @@ void
 MSHR::allocateAsBuffer(Packet * &target)
 {
     addr = target->getAddr();
-    threadNum = target->req->getThreadNum();
+    threadNum = 0/*target->req->getThreadNum()*/;
     pkt = new Packet(target->req, target->cmd, -1);
     pkt->allocate();
     pkt->senderState = (Packet::SenderState*)this;
@@ -100,6 +100,7 @@ MSHR::deallocate()
 {
     assert(targets.empty());
     assert(ntargets == 0);
+    delete pkt;
     pkt = NULL;
     inService = false;
     //allocIter = NULL;
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 97a56119f..1876a8987 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -128,6 +128,7 @@ MSHR*
 MSHRQueue::allocate(Packet * &pkt, int size)
 {
     Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1);
+    assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
@@ -212,8 +213,13 @@ void
 MSHRQueue::markInService(MSHR* mshr)
 {
     //assert(mshr == pendingList.front());
-    if (!mshr->pkt->needsResponse()) {
+    if (!(mshr->pkt->needsResponse() || mshr->pkt->cmd == Packet::UpgradeReq)) {
         assert(mshr->getNumTargets() == 0);
+        if ((mshr->pkt->flags & SATISFIED) && (mshr->pkt->cmd == Packet::Writeback)) {
+            //Writeback hit, so delete it
+            //otherwise the consumer will delete it
+            delete mshr->pkt->req;
+        }
         deallocate(mshr);
         return;
     }
@@ -251,7 +257,7 @@ MSHRQueue::squash(int threadNum)
                 Packet * target = mshr->getTarget();
                 mshr->popTarget();
 
-                assert(target->req->getThreadNum() == threadNum);
+                assert(0/*target->req->getThreadNum()*/ == threadNum);
                 target = NULL;
             }
             assert(!mshr->hasTargets());