From 83af0fdcf57175adf8077c51e9ba872dd2c04b76 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 21 Jun 2007 11:59:17 -0700
Subject: Getting closer...

configs/example/memtest.py:
    Add progress interval option.
src/base/traceflags.py:
    Add MemTest flag.
src/cpu/memtest/memtest.cc:
    Clean up tracing.
src/cpu/memtest/memtest.hh:
    Get rid of unused code.

--HG--
extra : convert_revision : 92bd8241a6c90bfb6d908e5a5132cbdb500cbb87
---
 src/mem/cache/base_cache.cc                   |   6 +-
 src/mem/cache/base_cache.hh                   | 109 ++++--
 src/mem/cache/cache.hh                        |  18 +-
 src/mem/cache/cache_impl.hh                   | 544 ++++++++++++--------------
 src/mem/cache/coherence/coherence_protocol.cc |   3 +-
 src/mem/cache/miss/mshr.cc                    |   4 +-
 src/mem/cache/miss/mshr.hh                    |   2 +-
 src/mem/cache/miss/mshr_queue.cc              |   9 +-
 src/mem/cache/miss/mshr_queue.hh              |   7 +-
 src/mem/cache/prefetch/base_prefetcher.cc     |   8 +-
 src/mem/packet.cc                             |  12 +-
 src/mem/packet.hh                             |   3 +-
 12 files changed, 377 insertions(+), 348 deletions(-)

(limited to 'src/mem')

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index c7006550b..8b476e100 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -50,8 +50,9 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
 
 BaseCache::BaseCache(const std::string &name, Params &params)
     : MemObject(name),
-      mshrQueue(params.numMSHRs, 4),
-      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
+      mshrQueue(params.numMSHRs, 4, MSHRQueue_MSHRs),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
+                  MSHRQueue_WriteBuffer),
       blkSize(params.blkSize),
       numTarget(params.numTargets),
       blocked(0),
@@ -128,6 +129,7 @@ BaseCache::init()
     cpuSidePort->sendStatusChange(Port::RangeChange);
 }
 
+
 void
 BaseCache::regStats()
 {
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 5969b4b3f..10fd3289c 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -54,41 +54,49 @@
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 
-/**
- * Reasons for Caches to be Blocked.
- */
-enum BlockedCause{
-    Blocked_NoMSHRs,
-    Blocked_NoTargets,
-    Blocked_NoWBBuffers,
-    Blocked_Coherence,
-    NUM_BLOCKED_CAUSES
-};
-
-/**
- * Reasons for cache to request a bus.
- */
-enum RequestCause{
-    Request_MSHR,
-    Request_WB,
-    Request_Coherence,
-    Request_PF
-};
-
 class MSHR;
 /**
  * A basic cache interface. Implements some common functions for speed.
  */
 class BaseCache : public MemObject
 {
+    /**
+     * Indexes to enumerate the MSHR queues.
+     */
+    enum MSHRQueueIndex {
+        MSHRQueue_MSHRs,
+        MSHRQueue_WriteBuffer
+    };
+
+    /**
+     * Reasons for caches to be blocked.
+     */
+    enum BlockedCause {
+        Blocked_NoMSHRs = MSHRQueue_MSHRs,
+        Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
+        Blocked_NoTargets,
+        NUM_BLOCKED_CAUSES
+    };
+
+  public:
+    /**
+     * Reasons for cache to request a bus.
+     */
+    enum RequestCause {
+        Request_MSHR = MSHRQueue_MSHRs,
+        Request_WB = MSHRQueue_WriteBuffer,
+        Request_PF,
+        NUM_REQUEST_CAUSES
+    };
+
+  private:
+
     class CachePort : public SimpleTimingPort
     {
       public:
         BaseCache *cache;
 
       protected:
-        Event *responseEvent;
-
         CachePort(const std::string &_name, BaseCache *_cache);
 
         virtual void recvStatusChange(Status status);
@@ -154,6 +162,36 @@ class BaseCache : public MemObject
     /** Write/writeback buffer */
     MSHRQueue writeBuffer;
 
+    MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
+                                 PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHR *mshr = mq->allocate(addr, size, pkt);
+        mshr->order = order++;
+
+        if (mq->isFull()) {
+            setBlocked((BlockedCause)mq->index);
+        }
+
+        if (requestBus) {
+            requestMemSideBus((RequestCause)mq->index, time);
+        }
+
+        return mshr;
+    }
+
+    void markInServiceInternal(MSHR *mshr)
+    {
+        MSHRQueue *mq = mshr->queue;
+        bool wasFull = mq->isFull();
+        mq->markInService(mshr);
+        if (!mq->havePending()) {
+            deassertMemSideBusRequest((RequestCause)mq->index);
+        }
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
+    }
+
     /** Block size of this cache */
     const int blkSize;
 
@@ -382,6 +420,31 @@ class BaseCache : public MemObject
     Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }
 
 
+    MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        return allocateBufferInternal(&mshrQueue,
+                                      blockAlign(pkt->getAddr()), blkSize,
+                                      pkt, time, requestBus);
+    }
+
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHRQueue *mq = NULL;
+
+        if (pkt->isWrite() && !pkt->isRead()) {
+            /**
+             * @todo Add write merging here.
+             */
+            mq = &writeBuffer;
+        } else {
+            mq = &mshrQueue;
+        }
+
+        return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                      pkt, time, requestBus);
+    }
+
+
     /**
      * Returns true if the cache is blocked for accesses.
      */
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 16d15cf86..06fce1a71 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -179,7 +179,7 @@ class Cache : public BaseCache
      * @return Pointer to the cache block touched by the request. NULL if it
      * was a miss.
      */
-    bool access(PacketPtr pkt, BlkType *blk, int & lat);
+    bool access(PacketPtr pkt, BlkType *&blk, int &lat);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
@@ -201,7 +201,7 @@ class Cache : public BaseCache
 
     bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyTarget(MSHR::Target *target, BlkType *blk);
-    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+    bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
 
@@ -310,15 +310,16 @@ class Cache : public BaseCache
      * @param isFill Whether to fetch & allocate a block
      *               or just forward the request.
      */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
-                         bool requestBus);
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);
 
     /**
      * Selects a outstanding request to service.
      * @return The request to service, NULL if none found.
      */
+    PacketPtr getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                           bool needsExclusive);
     MSHR *getNextMSHR();
-    PacketPtr getPacket();
+    PacketPtr getTimingPacket();
 
     /**
      * Marks a request as in service (sent on the bus). This can have side
@@ -328,13 +329,6 @@ class Cache : public BaseCache
      */
     void markInService(MSHR *mshr);
 
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr pkt, Tick time);
-
     /**
      * Perform the given writeback request.
      * @param pkt The writeback request.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0f66e613c..81fcb4158 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -152,40 +152,21 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 template<class TagStore, class Coherence>
 MSHR *
 Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool isFill, bool requestBus)
+                                          bool requestBus)
 {
-    int  size = isFill ? blkSize : pkt->getSize();
-    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+    MSHRQueue *mq = NULL;
 
-    MSHR *mshr = NULL;
-
-    if (pkt->isWrite()) {
+    if (pkt->isWrite() && !pkt->isRead()) {
         /**
          * @todo Add write merging here.
          */
-        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-
-        if (writeBuffer.isFull()) {
-            setBlocked(Blocked_NoWBBuffers);
-        }
-
-        if (requestBus) {
-            requestMemSideBus(Request_WB, time);
-        }
+        mq = &writeBuffer;
     } else {
-        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-        if (mshrQueue.isFull()) {
-            setBlocked(Blocked_NoMSHRs);
-        }
-        if (requestBus) {
-            requestMemSideBus(Request_MSHR, time);
-        }
+        mq = &mshrQueue;
     }
 
-    assert(mshr != NULL);
-    return mshr;
+    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                  pkt, time, requestBus);
 }
 
 
@@ -193,33 +174,7 @@ template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 {
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if (mshr->queue == &writeBuffer) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = writeBuffer.isFull();
-        writeBuffer.markInService(mshr);
-        if (!writeBuffer.havePending()){
-            deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !writeBuffer.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        assert(mshr->queue == &mshrQueue);
-        unblock = mshrQueue.isFull();
-        mshrQueue.markInService(mshr);
-        if (!mshrQueue.havePending()){
-            deassertMemSideBusRequest(Request_MSHR);
-        }
+    markInServiceInternal(mshr);
 #if 0
         if (mshr->originalCmd == MemCmd::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -231,14 +186,6 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
             }
         }
 #endif
-        if (unblock) {
-            unblock = !mshrQueue.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        clearBlocked(cause);
-    }
 }
 
 
@@ -275,9 +222,16 @@ Cache<TagStore,Coherence>::squash(int threadNum)
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
+    if (pkt->req->isUncacheable())  {
+        blk = NULL;
+        lat = hitLatency;
+        return false;
+    }
+
     bool satisfied = false;  // assume the worst
+    blk = tags->findBlock(pkt->getAddr(), lat);
 
     if (prefetchAccess) {
         //We are determining prefetches on access stream, call prefetcher
@@ -307,6 +261,8 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
 
+            // Check RMW operations first since both isRead() and
+            // isWrite() will be true for them
             if (pkt->cmd == MemCmd::SwapReq) {
                 cmpAndSwap(blk, pkt);
             } else if (pkt->isWrite()) {
@@ -314,12 +270,16 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
                     blk->status |= BlkDirty;
                     pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else {
-                assert(pkt->isRead());
+            } else if (pkt->isRead()) {
                 if (pkt->isLocked()) {
                     blk->trackLoadLocked(pkt);
                 }
                 pkt->setDataFromBlock(blk->data, blkSize);
+            } else {
+                // Not a read or write... must be an upgrade.  it's OK
+                // to just ack those as long as we have an exclusive
+                // copy at this level.
+                assert(pkt->cmd == MemCmd::UpgradeReq);
             }
         } else {
             // permission violation... nothing to do here, leave unsatisfied
@@ -351,19 +311,24 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // we charge hitLatency for doing just about anything here
     Tick time =  curTick + hitLatency;
 
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return true;
+    }
+
     if (pkt->req->isUncacheable()) {
-        allocateBuffer(pkt, time, false, true);
+        allocateBuffer(pkt, time, true);
         assert(pkt->needsResponse()); // else we should delete it here??
         return true;
     }
 
     PacketList writebacks;
     int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
     bool satisfied = false;
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
     if (!mshr) {
@@ -373,6 +338,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         // cache block... a more aggressive system could detect the
         // overlap (if any) and forward data out of the MSHRs, but we
         // don't do that yet)
+        BlkType *blk = NULL;
         satisfied = access(pkt, blk, lat);
     }
 
@@ -401,7 +367,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateBuffer(wbPkt, time, false, true);
+        allocateBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
 
@@ -435,7 +401,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             // always mark as cache fill for now... if we implement
             // no-write-allocate or bypass accesses this will have to
             // be changed.
-            allocateBuffer(pkt, time, true, true);
+            allocateMissBuffer(pkt, time, true);
         }
     }
 
@@ -449,54 +415,109 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }
 
 
+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                                        bool needsExclusive)
+{
+    bool blkValid = blk && blk->isValid();
+
+    if (cpu_pkt->req->isUncacheable()) {
+        assert(blk == NULL);
+        return NULL;
+    }
+
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback ||
+         cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+            // For now, writebacks from upper-level caches that
+            // completely miss in the cache just go through. If we had
+            // "fast write" support (where we could write the whole
+            // block w/o fetching new data) we might want to allocate
+            // on writeback misses instead.
+        return NULL;
+    }
+
+    MemCmd cmd;
+    const bool useUpgrades = true;
+    if (blkValid && useUpgrades) {
+        // only reason to be here is that blk is shared
+        // (read-only) and we need exclusive
+        assert(needsExclusive && !blk->isWritable());
+        cmd = MemCmd::UpgradeReq;
+    } else {
+        // block is invalid
+        cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    }
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
+
+    pkt->allocate();
+    return pkt;
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
+    int lat = hitLatency;
+
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return lat;
+    }
+
     // should assert here that there are no outstanding MSHRs or
     // writebacks... that would mean that someone used an atomic
     // access in timing mode
 
-    if (pkt->req->isUncacheable()) {
-        // Uncacheables just go through
-        return memSidePort->sendAtomic(pkt);
-    }
-
-    PacketList writebacks;
-    int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
-    bool satisfied = access(pkt, blk, lat);
+    BlkType *blk = NULL;
 
-    if (!satisfied) {
+    if (!access(pkt, blk, lat)) {
         // MISS
-        CacheBlk::State old_state = (blk) ? blk->status : 0;
-        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
-        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
-        busPkt.allocate();
+        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
 
-        DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                busPkt.cmdString(), busPkt.getAddr());
+        bool isCacheFill = (busPkt != NULL);
 
-        lat += memSidePort->sendAtomic(&busPkt);
+        if (busPkt == NULL) {
+            // just forwarding the same request to the next level
+            // no local cache operation involved
+            busPkt = pkt;
+        }
 
-        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                busPkt.cmdString(), busPkt.getAddr(), old_state);
+        DPRINTF(Cache, "Sending an atomic %s for %x\n",
+                busPkt->cmdString(), busPkt->getAddr());
 
-        blk = handleFill(&busPkt, blk, writebacks);
-        bool status = satisfyCpuSideRequest(pkt, blk);
-        assert(status);
-    }
+#if TRACING_ON
+        CacheBlk::State old_state = blk ? blk->status : 0;
+#endif
 
-    // We now have the block one way or another (hit or completed miss)
+        lat += memSidePort->sendAtomic(busPkt);
 
-    // Handle writebacks if needed
-    while (!writebacks.empty()){
-        PacketPtr wbPkt = writebacks.front();
-        memSidePort->sendAtomic(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt->cmdString(), busPkt->getAddr(), old_state);
+
+        if (isCacheFill) {
+            PacketList writebacks;
+            blk = handleFill(busPkt, blk, writebacks);
+            bool status = satisfyCpuSideRequest(pkt, blk);
+            assert(status);
+            delete busPkt;
+
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                PacketPtr wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
+            }
+        }
     }
 
+    // We now have the block one way or another (hit or completed miss)
+
     if (pkt->needsResponse()) {
         pkt->makeAtomicResponse();
         pkt->result = Packet::Success;
@@ -553,98 +574,94 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 //
 /////////////////////////////////////////////////////
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
+bool
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            mshrQueue.markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
             }
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        return true;
     } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            MSHR::Target *target = mshr->getTarget();
-            assert(target->cpuSide);
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                target->pkt->setData(pkt->getPtr<uint8_t>());
-            }
-            cpuSidePort->respond(target->pkt, time);
-            assert(!mshr->hasTargets());
+        return false;
+    }
+}
+
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
+{
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
+}
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
+{
+    // respond to MSHR targets, if any
+
+    // First offset for critical word first calculations
+    int initial_offset = 0;
+
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
+    }
+
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
+
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            MSHRQueue *mq = mshr->queue;
+            mq->markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return false;
         }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(!mshr->isCacheFill);
-            while (mshr->hasTargets()) {
-                MSHR::Target *target = mshr->getTarget();
-                assert(target->isCpuSide());
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    target->pkt->setData(pkt->getPtr<uint8_t>());
-                }
-                cpuSidePort->respond(target->pkt, time);
-            }
+
+
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
         }
 
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = writeBuffer.isFull();
-            writeBuffer.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !writeBuffer.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
-            }
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
         }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
-    if (unblock || unblock_target) {
-        clearBlocked(cause);
-    }
+
+    return true;
 }
 
 
@@ -665,21 +682,60 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     assert(pkt->result == Packet::Success);
     DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
+    MSHRQueue *mq = mshr->queue;
+    bool wasFull = mq->isFull();
+
+    if (mshr == noTargetMSHR) {
+        // we always clear at least one target
+        clearBlocked(Blocked_NoTargets);
+        noTargetMSHR = NULL;
+    }
+
+    // Can we deallocate MSHR when done?
+    bool deallocate = false;
+
     if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
-        satisfyMSHR(mshr, pkt, blk);
+        deallocate = satisfyMSHR(mshr, pkt, blk);
         // copy writebacks to write buffer
         while (!writebacks.empty()) {
             PacketPtr wbPkt = writebacks.front();
-            allocateBuffer(wbPkt, time, false, true);
+            allocateBuffer(wbPkt, time, true);
             writebacks.pop_front();
         }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+
+        while (mshr->hasTargets()) {
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->isCpuSide());
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+        }
+        assert(!mshr->hasTargets());
+        deallocate = true;
+    }
+
+    if (deallocate) {
+        mq->deallocate(mshr);
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
     }
-    handleResponse(pkt, time);
 }
 
 
@@ -717,6 +773,8 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
     Addr addr = pkt->getAddr();
 
     if (blk == NULL) {
+        // better have read new data
+        assert(pkt->isRead());
 
         // need to do a replacement
         blk = tags->findReplacement(addr, writebacks);
@@ -733,7 +791,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 
         blk->tag = tags->extractTag(addr);
         blk->status = coherence->getNewState(pkt);
-        assert(pkt->isRead());
     } else {
         // existing block... probably an upgrade
         assert(blk->tag == tags->extractTag(addr));
@@ -759,90 +816,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
-{
-    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
-        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-        if (pkt->isWrite()) {
-            if (blk->checkWrite(pkt)) {
-                blk->status |= BlkDirty;
-                pkt->writeDataToBlock(blk->data, blkSize);
-            }
-        } else if (pkt->isReadWrite()) {
-            cmpAndSwap(blk, pkt);
-        } else {
-            if (pkt->isLocked()) {
-                blk->trackLoadLocked(pkt);
-            }
-            pkt->setDataFromBlock(blk->data, blkSize);
-        }
-
-        return true;
-    } else {
-        return false;
-    }
-}
-
-
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
-{
-    assert(target != NULL);
-    assert(target->isCpuSide());
-    return satisfyCpuSideRequest(target->pkt, blk);
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                                       BlkType *blk)
-{
-    // respond to MSHR targets, if any
-
-    // First offset for critical word first calculations
-    int initial_offset = 0;
-
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
-    }
-
-    while (mshr->hasTargets()) {
-        MSHR::Target *target = mshr->getTarget();
-
-        if (!satisfyTarget(target, blk)) {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-            break;
-        }
-
-
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
-        }
-
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-        if (!target->pkt->req->isUncacheable()) {
-            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
-        }
-        target->pkt->makeTimingResponse();
-        cpuSidePort->respond(target->pkt, completion_time);
-        mshr->popTarget();
-    }
-}
-
-
 /////////////////////////////////////////////////////
 //
 // Snoop path: requests coming in from the memory side
@@ -1052,7 +1025,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
             // (hwpf_mshr_misses)
             mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             // Don't request bus, since we already have it
-            return allocateBuffer(pkt, curTick, true, false);
+            return allocateMissBuffer(pkt, curTick, false);
         }
     }
 
@@ -1062,7 +1035,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
 
 template<class TagStore, class Coherence>
 PacketPtr
-Cache<TagStore,Coherence>::getPacket()
+Cache<TagStore,Coherence>::getTimingPacket()
 {
     MSHR *mshr = getNextMSHR();
 
@@ -1073,30 +1046,21 @@ Cache<TagStore,Coherence>::getPacket()
     BlkType *blk = tags->findBlock(mshr->addr);
 
     // use request from 1st target
-    MSHR::Target *tgt1 = mshr->getTarget();
-    PacketPtr tgt1_pkt = tgt1->pkt;
-    PacketPtr pkt;
+    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
+    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
 
-    if (mshr->isCacheFill) {
-        MemCmd cmd;
-        if (blk && blk->isValid()) {
-            // only reason to be here is that blk is shared
-            // (read-only) and we need exclusive
-            assert(mshr->needsExclusive && !blk->isWritable());
-            cmd = MemCmd::UpgradeReq;
-        } else {
-            // block is invalid
-            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    mshr->isCacheFill = (pkt != NULL);
+
+    if (pkt == NULL) {
+        // make copy of current packet to forward
+        pkt = new Packet(tgt_pkt);
+        pkt->allocate();
+        if (pkt->isWrite()) {
+            pkt->setData(tgt_pkt->getPtr<uint8_t>());
         }
-        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
-    } else {
-        assert(blk == NULL);
-        assert(mshr->getNumTargets() == 1);
-        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
     }
 
     pkt->senderState = mshr;
-    pkt->allocate();
     return pkt;
 }
 
@@ -1243,7 +1207,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
         waitingOnRetry = !success;
     } else {
         // check for non-response packets (requests & writebacks)
-        PacketPtr pkt = myCache()->getPacket();
+        PacketPtr pkt = myCache()->getTimingPacket();
         MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
         bool success = sendTiming(pkt);
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 3fd17c8c7..47d2b469f 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -259,7 +259,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     MC::Command writeToSharedCmd =
         doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
     MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExResp;
+        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;
 
     // Note that all transitions by default cause a panic.
     // Override the valid transitions with the appropriate actions here.
@@ -272,6 +272,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
     tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 218d42339..1f2c05a6e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -54,12 +54,12 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target)
 {
     addr = _addr;
     size = _size;
     assert(target);
-    isCacheFill = cacheFill;
+    isCacheFill = false;
     needsExclusive = target->needsExclusive();
     _isUncacheable = target->req->isUncacheable();
     inService = false;
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index b38b69c52..47f6a819b 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -136,7 +136,7 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
+    void allocate(Addr addr, int size, PacketPtr pkt);
 
     /**
      * Allocate this MSHR as a buffer for the given request.
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index d58594798..6b030a865 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -36,8 +36,9 @@
 
 using namespace std;
 
-MSHRQueue::MSHRQueue(int num_entries, int reserve)
-    : numEntries(num_entries + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve, int _index)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve),
+      index(_index)
 {
     allocated = 0;
     inServiceEntries = 0;
@@ -107,14 +108,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 }
 
 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
 {
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    mshr->allocate(addr, size, pkt, isFill);
+    mshr->allocate(addr, size, pkt);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
 
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 182dfd5b2..806aa9c64 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -74,6 +74,9 @@ class MSHRQueue
     int allocated;
     /** The number of entries that have been forwarded to the bus. */
     int inServiceEntries;
+    /** The index of this queue within the cache (MSHR queue vs. write
+     * buffer). */
+    const int index;
 
     /**
      * Create a queue with a given number of entries.
@@ -81,7 +84,7 @@ class MSHRQueue
      * @param reserve The minimum number of entries needed to satisfy
      * any access.
      */
-    MSHRQueue(int num_entries, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve, int index);
 
     /** Destructor */
     ~MSHRQueue();
@@ -118,7 +121,7 @@ class MSHRQueue
      *
      * @pre There are free entries.
      */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index d03cfe3ae..378363665 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
             keepTrying = cache->inCache(pkt->getAddr());
         }
         if (pf.empty()) {
-            cache->deassertMemSideBusRequest(Request_PF);
+            cache->deassertMemSideBusRequest(BaseCache::Request_PF);
             if (keepTrying) return NULL; //None left, all were in cache
         }
     } while (keepTrying);
@@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pfRemovedMSHR++;
             pf.erase(iter);
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
         //Remove anything in queue with delay older than time
@@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                 iter--;
             }
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
 
@@ -243,7 +243,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pf.push_back(prefetch);
 
             //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(Request_PF, prefetch->time);
+            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
 
             //Increment through the list
             addr++;
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 57c6a6381..cd0ed8a2e 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -64,10 +64,8 @@ MemCmd::commandInfo[] =
     /* WriteResp */
     { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
-            WritebackAck, "Writeback" },
-    /* WritebackAck */
-    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET4(IsWrite, NeedsExclusive, IsRequest, HasData),
+            InvalidCmd, "Writeback" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
@@ -88,7 +86,11 @@ MemCmd::commandInfo[] =
     { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
             InvalidCmd, "WriteInvalidateResp" },
     /* UpgradeReq */
-    { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
+    { SET4(IsInvalidate, NeedsExclusive, IsRequest, NeedsResponse),
+            UpgradeResp, "UpgradeReq" },
+    /* UpgradeResp */
+    { SET3(IsInvalidate, NeedsExclusive, IsResponse),
+            InvalidCmd, "UpgradeResp" },
     /* ReadExReq */
     { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
             ReadExResp, "ReadExReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index ca186d875..6291b7c1d 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -67,7 +67,6 @@ class MemCmd
         WriteReq,
         WriteResp,
         Writeback,
-        WritebackAck,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
@@ -75,6 +74,7 @@ class MemCmd
         WriteInvalidateReq,
         WriteInvalidateResp,
         UpgradeReq,
+        UpgradeResp,
         ReadExReq,
         ReadExResp,
         LoadLockedReq,
@@ -100,7 +100,6 @@ class MemCmd
         NeedsResponse,  //!< Requester needs response from target
         IsSWPrefetch,
         IsHWPrefetch,
-        IsUpgrade,
         IsLocked,       //!< Alpha/MIPS LL or SC access
         HasData,        //!< There is an associated payload
         NUM_COMMAND_ATTRIBUTES
-- 
cgit v1.2.3