1 files changed, 909 insertions, 948 deletions
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9b094c1e3..0649b5061 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -28,6 +28,8 @@
  * Authors: Erik Hallnor
  *          Dave Greene
  *          Nathan Binkert
+ *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -35,17 +37,8 @@
  * Cache definitions.
  */
 
-#include <assert.h>
-#include <math.h>
-
-#include <cassert>
-#include <iostream>
-#include <cstring>
-#include <string>
-
 #include "sim/host.hh"
 #include "base/misc.hh"
-#include "cpu/smt.hh"
 
 #include "mem/cache/cache.hh"
 #include "mem/cache/cache_blk.hh"
@@ -54,43 +47,25 @@
 
 #include "sim/sim_exit.hh" // for SimExitEvent
 
-bool SIGNAL_NACK_HACK;
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::
-recvStatusChange(Port::Status status, bool isCpuSide)
-{
-
-}
-
 
 template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-Cache(const std::string &_name,
-      Cache<TagStore,Coherence>::Params &params)
+Cache<TagStore,Coherence>::Cache(const std::string &_name,
+                                 Cache<TagStore,Coherence>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
-      tags(params.tags), missQueue(params.missQueue),
+      tags(params.tags),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      hitLatency(params.hitLatency),
-      compressionAlg(params.compressionAlg),
-      blkSize(params.blkSize),
       doFastWrites(params.doFastWrites),
-      prefetchMiss(params.prefetchMiss),
-      storeCompressed(params.storeCompressed),
-      compressOnWriteback(params.compressOnWriteback),
-      compLatency(params.compLatency),
-      adaptiveCompression(params.adaptiveCompression),
-      writebackCompressed(params.writebackCompressed)
+      prefetchMiss(params.prefetchMiss)
 {
+    cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
+    memSidePort = new MemSidePort(_name + "-mem_side_port", this);
+    cpuSidePort->setOtherPort(memSidePort);
+    memSidePort->setOtherPort(cpuSidePort);
+
     tags->setCache(this);
-    missQueue->setCache(this);
-    missQueue->setPrefetcher(prefetcher);
     coherence->setCache(this);
     prefetcher->setCache(this);
-    invalidateReq = new Request((Addr) NULL, blkSize, 0);
-    invalidatePkt = new Packet(invalidateReq, MemCmd::InvalidateReq, 0);
 }
 
 template<class TagStore, class Coherence>
@@ -99,51 +74,154 @@ Cache<TagStore,Coherence>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    missQueue->regStats(name());
     coherence->regStats(name());
     prefetcher->regStats(name());
 }
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
-                                        PacketList & writebacks, bool update)
+Port *
+Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
 {
-    // Set the block offset here
-    int offset = tags->extractBlkOffset(pkt->getAddr());
-
-    BlkType *blk = NULL;
-    if (update) {
-        blk = tags->findBlock(pkt->getAddr(), lat);
+    if (if_name == "" || if_name == "cpu_side") {
+        return cpuSidePort;
+    } else if (if_name == "mem_side") {
+        return memSidePort;
+    } else if (if_name == "functional") {
+        return new CpuSidePort(name() + "-cpu_side_funcport", this);
     } else {
-        blk = tags->findBlock(pkt->getAddr());
-        lat = 0;
+        panic("Port name %s unrecognized\n", if_name);
     }
-    if (blk != NULL) {
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+{
+    if (cpuSidePort == p || memSidePort == p)
+        panic("Can only delete functional ports\n");
 
-        if (!update) {
+    delete p;
+}
 
-            if (pkt->isWrite()){
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset+pkt->getSize() <= blkSize);
-                std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                       pkt->getSize());
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
-            } else if (!(pkt->flags & SATISFIED)) {
-                pkt->flags |= SATISFIED;
-                pkt->result = Packet::Success;
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset + pkt->getSize() <=blkSize);
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                       pkt->getSize());
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
+{
+    uint64_t overwrite_val;
+    bool overwrite_mem;
+    uint64_t condition_val64;
+    uint32_t condition_val32;
+
+    int offset = tags->extractBlkOffset(pkt->getAddr());
+    uint8_t *blk_data = blk->data + offset;
+
+    assert(sizeof(uint64_t) >= pkt->getSize());
+
+    overwrite_mem = true;
+    // keep a copy of our possible write value, and copy what is at the
+    // memory address into the packet
+    pkt->writeData((uint8_t *)&overwrite_val);
+    pkt->setData(blk_data);
+
+    if (pkt->req->isCondSwap()) {
+        if (pkt->getSize() == sizeof(uint64_t)) {
+            condition_val64 = pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val64, blk_data,
+                                         sizeof(uint64_t));
+        } else if (pkt->getSize() == sizeof(uint32_t)) {
+            condition_val32 = (uint32_t)pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val32, blk_data,
+                                         sizeof(uint32_t));
+        } else
+            panic("Invalid size for conditional read/write\n");
+    }
+
+    if (overwrite_mem)
+        std::memcpy(blk_data, &overwrite_val, pkt->getSize());
+}
+
+
+/////////////////////////////////////////////////////
+//
+// MSHR helper functions
+//
+/////////////////////////////////////////////////////
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::markInService(MSHR *mshr)
+{
+    markInServiceInternal(mshr);
+#if 0
+        if (mshr->originalCmd == MemCmd::HardPFReq) {
+            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
+                    name());
+            //Also clear pending if need be
+            if (!prefetcher->havePending())
+            {
+                deassertMemSideBusRequest(Request_PF);
             }
-            return blk;
         }
+#endif
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::squash(int threadNum)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
+        noTargetMSHR = NULL;
+        unblock = true;
+        cause = Blocked_NoTargets;
+    }
+    if (mshrQueue.isFull()) {
+        unblock = true;
+        cause = Blocked_NoMSHRs;
+    }
+    mshrQueue.squash(threadNum);
+    if (!mshrQueue.havePending()) {
+        deassertMemSideBusRequest(Request_MSHR);
+    }
+    if (unblock && !mshrQueue.isFull()) {
+        clearBlocked(cause);
+    }
+}
+
+/////////////////////////////////////////////////////
+//
+// Access path: requests coming in from the CPU side
+//
+/////////////////////////////////////////////////////
 
-        // Hit
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
+{
+    if (pkt->req->isUncacheable())  {
+        blk = NULL;
+        lat = hitLatency;
+        return false;
+    }
+
+    bool satisfied = false;  // assume the worst
+    blk = tags->findBlock(pkt->getAddr(), lat);
+
+    if (prefetchAccess) {
+        //We are determining prefetches on access stream, call prefetcher
+        prefetcher->handleMiss(pkt, curTick);
+    }
+
+    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
+            (blk) ? "hit" : "miss");
+
+    if (blk != NULL) {
+        // HIT
         if (blk->isPrefetch()) {
             //Signal that this was a hit under prefetch (no need for
             //use prefetch (only can get here if true)
@@ -157,652 +235,641 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
             }
         }
 
-        if ((pkt->isReadWrite() && blk->isWritable()) ||
-            (pkt->isWrite() && blk->isWritable()) ||
-            (pkt->isRead() && blk->isValid())) {
-
-            // We are satisfying the request
-            pkt->flags |= SATISFIED;
-
-            if (blk->isCompressed()) {
-                // If the data is compressed, need to increase the latency
-                lat += (compLatency/4);
-            }
-
-            bool write_data = false;
-
-            assert(verifyData(blk));
-
-            assert(offset < blkSize);
-            assert(pkt->getSize() <= blkSize);
-            assert(offset+pkt->getSize() <= blkSize);
+        if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) {
+            // OK to satisfy access
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            satisfied = true;
 
-            if (pkt->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    write_data = true;
+            // Check RMW operations first since both isRead() and
+            // isWrite() will be true for them
+            if (pkt->cmd == MemCmd::SwapReq) {
+                cmpAndSwap(blk, pkt);
+            } else if (pkt->isWrite()) {
+                if (blk->checkWrite(pkt)) {
                     blk->status |= BlkDirty;
-                    std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                           pkt->getSize());
+                    pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
-            } else {
-                assert(pkt->isRead());
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
+            } else if (pkt->isRead()) {
+                if (pkt->isLocked()) {
+                    blk->trackLoadLocked(pkt);
                 }
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                            pkt->getSize());
-            }
-
-            if (write_data ||
-                (adaptiveCompression && blk->isCompressed()))
-            {
-                // If we wrote data, need to update the internal block
-                // data.
-                updateData(blk, writebacks,
-                           !(adaptiveCompression &&
-                             blk->isReferenced()));
+                pkt->setDataFromBlock(blk->data, blkSize);
+            } else {
+                // Not a read or write... must be an upgrade.  it's OK
+                // to just ack those as long as we have an exclusive
+                // copy at this level.
+                assert(pkt->cmd == MemCmd::UpgradeReq);
             }
         } else {
-            // permission violation, treat it as a miss
-            blk = NULL;
+            // permission violation... nothing to do here, leave unsatisfied
+            // for statistics purposes this counts like a complete miss
+            incMissCount(pkt);
         }
     } else {
         // complete miss (no matching block)
-        if (pkt->req->isLocked() && pkt->isWrite()) {
+        incMissCount(pkt);
+
+        if (pkt->isLocked() && pkt->isWrite()) {
             // miss on store conditional... just give up now
             pkt->req->setExtraData(0);
-            pkt->flags |= SATISFIED;
+            satisfied = true;
         }
     }
 
-    return blk;
+    return satisfied;
 }
 
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr &pkt){
-            uint64_t overwrite_val;
-            bool overwrite_mem;
-            uint64_t condition_val64;
-            uint32_t condition_val32;
-
-            int offset = tags->extractBlkOffset(pkt->getAddr());
-
-            assert(sizeof(uint64_t) >= pkt->getSize());
-
-            overwrite_mem = true;
-            // keep a copy of our possible write value, and copy what is at the
-            // memory address into the packet
-            std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                        pkt->getSize());
-
-            if (pkt->req->isCondSwap()) {
-                if (pkt->getSize() == sizeof(uint64_t)) {
-                    condition_val64 = pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val64, blk->data + offset,
-                                                 sizeof(uint64_t));
-                } else if (pkt->getSize() == sizeof(uint32_t)) {
-                    condition_val32 = (uint32_t)pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val32, blk->data + offset,
-                                                 sizeof(uint32_t));
-                } else
-                    panic("Invalid size for conditional read/write\n");
-            }
-
-            if (overwrite_mem)
-                std::memcpy(blk->data + offset,
-                            &overwrite_val, pkt->getSize());
-
-}
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks,
-                                      PacketPtr target)
+bool
+Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 {
-#ifndef NDEBUG
-    BlkType *tmp_blk = tags->findBlock(pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+//@todo Add back in MemDebug Calls
+//    MemDebug::cacheAccess(pkt);
 
+    // we charge hitLatency for doing just about anything here
+    Tick time =  curTick + hitLatency;
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return true;
     }
 
-        blk->whenReady = pkt->finishTime;
+    if (pkt->req->isUncacheable()) {
+        allocateBuffer(pkt, time, true);
+        assert(pkt->needsResponse()); // else we should delete it here??
+        return true;
+    }
 
-    // Respond to target, if any
-    if (target) {
+    PacketList writebacks;
+    int lat = hitLatency;
+    bool satisfied = false;
 
-        target->flags |= SATISFIED;
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+
+    if (!mshr) {
+        // no outstanding access to this block, look up in cache
+        // (otherwise if we allow reads while there's an outstanding
+        // write miss, the read could return stale data out of the
+        // cache block... a more aggressive system could detect the
+        // overlap (if any) and forward data out of the MSHRs, but we
+        // don't do that yet)
+        BlkType *blk = NULL;
+        satisfied = access(pkt, blk, lat);
+    }
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            tags->invalidateBlk(blk);
-            blk = NULL;
+#if 0
+    // If this is a block size write/hint (WH64) allocate the block here
+    // if the coherence protocol allows it.
+    /** @todo make the fast write alloc (wh64) work with coherence. */
+    /** @todo Do we want to do fast writes for writebacks as well? */
+    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
+        (pkt->cmd == MemCmd::WriteReq
+         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
+        // not outstanding misses, can do this
+        MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr());
+        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
+            if (outstanding_miss) {
+                warn("WriteInv doing a fastallocate"
+                     "with an outstanding miss to the same address\n");
+            }
+            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
+                                   writebacks);
+            ++fastWrites;
         }
+    }
+#endif
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-                    blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isReadWrite() || target->isRead());
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
+    // copy writebacks to write buffer
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        allocateBuffer(wbPkt, time, true);
+        writebacks.pop_front();
+    }
+
+    bool needsResponse = pkt->needsResponse();
+
+    if (satisfied) {
+        assert(needsResponse);
+        pkt->makeTimingResponse();
+        cpuSidePort->respond(pkt, curTick+lat);
+    } else {
+        // miss
+        if (prefetchMiss)
+            prefetcher->handleMiss(pkt, time);
+
+        if (mshr) {
+            // MSHR hit
+            //@todo remove hw_pf here
+            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
+                mshr->threadNum = -1;
+            }
+            mshr->allocateTarget(pkt, true);
+            if (mshr->getNumTargets() == numTarget) {
+                noTargetMSHR = mshr;
+                setBlocked(Blocked_NoTargets);
+                mshrQueue.moveToFront(mshr);
             }
+        } else {
+            // no MSHR
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // always mark as cache fill for now... if we implement
+            // no-write-allocate or bypass accesses this will have to
+            // be changed.
+            allocateMissBuffer(pkt, time, true);
         }
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    if (!needsResponse) {
+        // Need to clean up the packet on a writeback miss, but leave
+        // the request for the next level.
+        delete pkt;
     }
-    return blk;
+
+    return true;
 }
 
+
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks, PacketPtr pkt)
+PacketPtr
+Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                                        bool needsExclusive)
 {
-/*
-#ifndef NDEBUG
-    BlkType *tmp_blk = findBlock(mshr->pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    PacketPtr pkt = mshr->pkt;*/
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+    bool blkValid = blk && blk->isValid();
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (cpu_pkt->req->isUncacheable()) {
+        assert(blk == NULL);
+        return NULL;
     }
 
-    blk->whenReady = pkt->finishTime;
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback ||
+         cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+            // For now, writebacks from upper-level caches that
+            // completely miss in the cache just go through. If we had
+            // "fast write" support (where we could write the whole
+            // block w/o fetching new data) we might want to allocate
+            // on writeback misses instead.
+        return NULL;
+    }
 
+    assert(cpu_pkt->needsResponse());
 
-    // respond to MSHR targets, if any
+    MemCmd cmd;
+    const bool useUpgrades = true;
+    if (blkValid && useUpgrades) {
+        // only reason to be here is that blk is shared
+        // (read-only) and we need exclusive
+        assert(needsExclusive && !blk->isWritable());
+        cmd = MemCmd::UpgradeReq;
+    } else {
+        // block is invalid
+        cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    }
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
 
-    // First offset for critical word first calculations
-    int initial_offset = 0;
+    pkt->allocate();
+    return pkt;
+}
 
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->getOffset(blkSize);
+
+template<class TagStore, class Coherence>
+Tick
+Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
+{
+    int lat = hitLatency;
+
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return lat;
     }
 
-    while (mshr->hasTargets()) {
-        PacketPtr target = mshr->getTarget();
+    // should assert here that there are no outstanding MSHRs or
+    // writebacks... that would mean that someone used an atomic
+    // access in timing mode
 
-        target->flags |= SATISFIED;
+    BlkType *blk = NULL;
 
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
+    if (!access(pkt, blk, lat)) {
+        // MISS
+        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
+
+        bool isCacheFill = (busPkt != NULL);
+
+        if (busPkt == NULL) {
+            // just forwarding the same request to the next level
+            // no local cache operation involved
+            busPkt = pkt;
         }
 
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+        DPRINTF(Cache, "Sending an atomic %s for %x\n",
+                busPkt->cmdString(), busPkt->getAddr());
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            //Mark the blk as invalid now, if it hasn't been already
-            if (blk) {
-                tags->invalidateBlk(blk);
-                blk = NULL;
-            }
+#if TRACING_ON
+        CacheBlk::State old_state = blk ? blk->status : 0;
+#endif
 
-            //Also get rid of the invalidate
-            mshr->popTarget();
+        lat += memSidePort->sendAtomic(busPkt);
 
-            DPRINTF(Cache, "Popping off a Invalidate for addr %x\n",
-                    pkt->getAddr());
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt->cmdString(), busPkt->getAddr(), old_state);
 
-            continue;
-        }
+        if (isCacheFill) {
+            PacketList writebacks;
+            blk = handleFill(busPkt, blk, writebacks);
+            bool status = satisfyCpuSideRequest(pkt, blk);
+            assert(status);
+            delete busPkt;
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-            blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isRead() || target->isReadWrite() );
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (target->req->isLocked()) {
-                    blk->trackLoadLocked(target->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                PacketPtr wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
             }
-        } else {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-//            mshr->pkt = pkt;
-            break;
         }
-        respondToMiss(target, completion_time);
-        mshr->popTarget();
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    // We now have the block one way or another (hit or completed miss)
+
+    if (pkt->needsResponse()) {
+        pkt->makeAtomicResponse();
+        pkt->result = Packet::Success;
     }
 
-    return blk;
+    return lat;
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state,
-                                       PacketPtr &pkt)
+Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
+                                            CachePort *otherSidePort)
 {
-    //Must have the block to supply
-    assert(blk);
-    // Can only supply data, and if it hasn't already been supllied
-    assert(pkt->isRead());
-    assert(!(pkt->flags & SATISFIED));
-    pkt->flags |= SATISFIED;
-    Addr offset = pkt->getOffset(blkSize);
-    assert(offset < blkSize);
-    assert(pkt->getSize() <= blkSize);
-    assert(offset + pkt->getSize() <=blkSize);
-    std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset, pkt->getSize());
-
-    handleSnoop(blk, new_state);
-}
+    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state)
-{
-    if (blk && blk->status != new_state) {
-        if ((new_state && BlkValid) == 0) {
-            tags->invalidateBlk(blk);
-        } else {
-            assert(new_state >= 0 && new_state < 128);
-            blk->status = new_state;
+    if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) {
+        // request satisfied from block
+        return;
+    }
+
+    // Need to check for outstanding misses and writes
+
+    // There can only be one matching outstanding miss.
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    if (mshr) {
+        MSHR::TargetList *targets = mshr->getTargetList();
+        MSHR::TargetList::iterator i = targets->begin();
+        MSHR::TargetList::iterator end = targets->end();
+        for (; i != end; ++i) {
+            PacketPtr targetPkt = i->pkt;
+            if (pkt->checkFunctional(targetPkt))
+                return;
         }
     }
-}
 
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
-{
-    assert(blk && blk->isValid() && blk->isModified());
-    int data_size = blkSize;
-    data_size = blk->size;
-    if (compressOnWriteback) {
-        // not already compressed
-        // need to compress to ship it
-        assert(data_size == blkSize);
-        uint8_t *tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->compress(tmp_data,blk->data,
-                                      data_size);
-        delete [] tmp_data;
+    // There can be many matching outstanding writes.
+    std::vector<MSHR*> writes;
+    writeBuffer.findMatches(blk_addr, writes);
+    for (int i = 0; i < writes.size(); ++i) {
+        MSHR *mshr = writes[i];
+        if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
+            return;
     }
 
-/*    PacketPtr writeback =
-        buildWritebackReq(tags->regenerateBlkAddr(blk->tag, blk->set),
-                          blk->asid, blkSize,
-                          blk->data, data_size);
-*/
+    otherSidePort->checkAndSendFunctional(pkt);
+}
 
-    Request *writebackReq =
-        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
-    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
-    writeback->allocate();
-    std::memcpy(writeback->getPtr<uint8_t>(),blk->data,blkSize);
 
-    blk->status &= ~BlkDirty;
-    return writeback;
-}
+/////////////////////////////////////////////////////
+//
+// Response handling: responses from the memory side
+//
+/////////////////////////////////////////////////////
 
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::verifyData(BlkType *blk)
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    bool retval;
-    // The data stored in the blk
-    uint8_t *blk_data = new uint8_t[blkSize];
-    tags->readData(blk, blk_data);
-    // Pointer for uncompressed data, assumed uncompressed
-    uint8_t *tmp_data = blk_data;
-    // The size of the data being stored, assumed uncompressed
-    int data_size = blkSize;
-
-    // If the block is compressed need to uncompress to access
-    if (blk->isCompressed()){
-        // Allocate new storage for the data
-        tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->uncompress(tmp_data,blk_data, blk->size);
-        assert(data_size == blkSize);
-        // Don't need to keep blk_data around
-        delete [] blk_data;
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
+            }
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
+        }
+
+        return true;
     } else {
-        assert(blkSize == blk->size);
+        return false;
     }
-
-    retval = std::memcmp(tmp_data, blk->data, blkSize) == 0;
-    delete [] tmp_data;
-    return retval;
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::updateData(BlkType *blk, PacketList &writebacks,
-                                        bool compress_block)
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
 {
-    if (storeCompressed && compress_block) {
-        uint8_t *comp_data = new uint8_t[blkSize];
-        int new_size = compressionAlg->compress(comp_data, blk->data, blkSize);
-        if (new_size > (blkSize - tags->getSubBlockSize())){
-            // no benefit to storing it compressed
-            blk->status &= ~BlkCompressed;
-            tags->writeData(blk, blk->data, blkSize,
-                          writebacks);
-        } else {
-            // Store the data compressed
-            blk->status |= BlkCompressed;
-            tags->writeData(blk, comp_data, new_size,
-                          writebacks);
-        }
-        delete [] comp_data;
-    } else {
-        blk->status &= ~BlkCompressed;
-        tags->writeData(blk, blk->data, blkSize, writebacks);
-    }
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
 }
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::doReplacement(BlkType *blk, PacketPtr &pkt,
-                                         CacheBlk::State new_state,
-                                         PacketList &writebacks)
+bool
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
 {
-    if (blk == NULL) {
-        // need to do a replacement
-        BlkList compress_list;
-        blk = tags->findReplacement(pkt, writebacks, compress_list);
-        while (adaptiveCompression && !compress_list.empty()) {
-            updateData(compress_list.front(), writebacks, true);
-            compress_list.pop_front();
+    // respond to MSHR targets, if any
+
+    // First offset for critical word first calculations
+    int initial_offset = 0;
+
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
+    }
+
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
+
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            MSHRQueue *mq = mshr->queue;
+            mq->markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return false;
         }
-        if (blk->isValid()) {
-            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag,blk->set), pkt->getAddr(),
-                    (blk->isModified()) ? "writeback" : "clean");
 
-            if (blk->isModified()) {
-                // Need to write the data back
-                writebacks.push_back(writebackBlk(blk));
-            }
+
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
         }
-        blk->tag = tags->extractTag(pkt->getAddr(), blk);
-    } else {
-        // must be a status change
-        // assert(blk->status != new_state);
-        if (blk->status == new_state) warn("Changing state to same value\n");
+
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
+        }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
 
-    blk->status = new_state;
-    return blk;
+    return true;
 }
 
 
 template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::access(PacketPtr &pkt)
+void
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
 {
-//@todo Add back in MemDebug Calls
-//    MemDebug::cacheAccess(pkt);
-    BlkType *blk = NULL;
-    PacketList writebacks;
-    int size = blkSize;
-    int lat = hitLatency;
-    if (prefetchAccess) {
-        //We are determining prefetches on access stream, call prefetcher
-        prefetcher->handleMiss(pkt, curTick);
+    Tick time = curTick + hitLatency;
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+    assert(mshr);
+    if (pkt->result == Packet::Nacked) {
+        //pkt->reinitFromRequest();
+        warn("NACKs from devices not connected to the same bus "
+             "not implemented\n");
+        return;
     }
+    assert(pkt->result != Packet::BadAddress);
+    assert(pkt->result == Packet::Success);
+    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+    MSHRQueue *mq = mshr->queue;
+    bool wasFull = mq->isFull();
 
-    if (!pkt->req->isUncacheable()) {
-        if (!missQueue->findMSHR(blk_addr)) {
-            blk = handleAccess(pkt, lat, writebacks);
-        }
-    } else {
-        size = pkt->getSize();
-    }
-    // If this is a block size write/hint (WH64) allocate the block here
-    // if the coherence protocol allows it.
-    /** @todo make the fast write alloc (wh64) work with coherence. */
-    /** @todo Do we want to do fast writes for writebacks as well? */
-    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
-        (pkt->cmd == MemCmd::WriteReq
-         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
-        // not outstanding misses, can do this
-        MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr());
-        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
-            if (outstanding_miss) {
-                warn("WriteInv doing a fastallocate"
-                     "with an outstanding miss to the same address\n");
-            }
-            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
-                                   writebacks);
-            ++fastWrites;
-        }
-    }
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        missQueue->doWriteback(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
+    if (mshr == noTargetMSHR) {
+        // we always clear at least one target
+        clearBlocked(Blocked_NoTargets);
+        noTargetMSHR = NULL;
     }
 
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
-            (blk) ? "hit" : "miss");
+    // Can we deallocate MSHR when done?
+    bool deallocate = false;
 
-    if (blk) {
-        // Hit
-        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        // clear dirty bit if write through
-        respond(pkt, curTick+lat);
-        return true;
-    }
+    if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
+        DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
+                pkt->getAddr());
+        BlkType *blk = tags->findBlock(pkt->getAddr());
+        PacketList writebacks;
+        blk = handleFill(pkt, blk, writebacks);
+        deallocate = satisfyMSHR(mshr, pkt, blk);
+        // copy writebacks to write buffer
+        while (!writebacks.empty()) {
+            PacketPtr wbPkt = writebacks.front();
+            allocateBuffer(wbPkt, time, true);
+            writebacks.pop_front();
+        }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
 
-    // Miss
-    if (!pkt->req->isUncacheable()) {
-        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        /** @todo Move miss count code into BaseCache */
-        if (missCount) {
-            --missCount;
-            if (missCount == 0)
-                exitSimLoop("A cache reached the maximum miss count");
+        while (mshr->hasTargets()) {
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->isCpuSide());
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
         }
+        assert(!mshr->hasTargets());
+        deallocate = true;
     }
 
-    if (pkt->flags & SATISFIED) {
-        // happens when a store conditional fails because it missed
-        // the cache completely
-        respond(pkt, curTick+lat);
-    } else {
-        missQueue->handleMiss(pkt, size, curTick + hitLatency);
+    if (deallocate) {
+        mq->deallocate(mshr);
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
     }
+}
 
-    if (!pkt->needsResponse()) {
-        //Need to clean up the packet on a writeback miss, but leave the request
-        //for the next level.
-        delete pkt;
-    }
 
-    return true;
-}
 
 
 template<class TagStore, class Coherence>
 PacketPtr
-Cache<TagStore,Coherence>::getPacket()
+Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
 {
-    assert(missQueue->havePending());
-    PacketPtr pkt = missQueue->getPacket();
-    if (pkt) {
-        if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == MemCmd::HardPFReq)
-                misses[MemCmd::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-            missQueue->setBusCmd(pkt, cmd);
-        }
-    }
+    assert(blk && blk->isValid() && blk->isDirty());
 
-    assert(!doMasterRequest() || missQueue->havePending());
-    assert(!pkt || pkt->time <= curTick);
-    SIGNAL_NACK_HACK = false;
-    return pkt;
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
+
+    Request *writebackReq =
+        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
+    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
+    writeback->allocate();
+    std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
+
+    blk->status &= ~BlkDirty;
+    return writeback;
 }
 
+
+// Note that the reason we return a list of writebacks rather than
+// inserting them directly in the write buffer is that this function
+// is called by both atomic and timing-mode accesses, and in atomic
+// mode we don't mess with the write buffer (we just perform the
+// writebacks atomically once the original request is complete).
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
-                                                bool success)
+typename Cache<TagStore,Coherence>::BlkType*
+Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
+                                      PacketList &writebacks)
 {
-    if (success && !(SIGNAL_NACK_HACK)) {
-        //Remember if it was an upgrade because writeback MSHR's are removed
-        //in Mark in Service
-        bool upgrade = (mshr->pkt && mshr->pkt->cmd == MemCmd::UpgradeReq);
-
-        missQueue->markInService(mshr->pkt, mshr);
-
-        //Temp Hack for UPGRADES
-        if (upgrade) {
-            assert(pkt);  //Upgrades need to be fixed
-            pkt->flags &= ~CACHE_LINE_FILL;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", pkt->getAddr(), old_state, new_state);
-            //Set the state on the upgrade
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
-            PacketList writebacks;
-            handleFill(blk, mshr, new_state, writebacks, pkt);
-            assert(writebacks.empty());
-            missQueue->handleResponse(pkt, curTick + hitLatency);
-        }
-    } else if (pkt && !pkt->req->isUncacheable()) {
-        pkt->flags &= ~NACKED_LINE;
-        SIGNAL_NACK_HACK = false;
-        pkt->flags &= ~SATISFIED;
-        pkt->flags &= ~SNOOP_COMMIT;
+    Addr addr = pkt->getAddr();
 
-//Rmove copy from mshr
-        delete mshr->pkt;
-        mshr->pkt = pkt;
+    if (blk == NULL) {
+        // better have read new data
+        assert(pkt->isRead());
 
-        missQueue->restoreOrigCmd(pkt);
-    }
-}
+        // need to do a replacement
+        blk = tags->findReplacement(addr, writebacks);
+        if (blk->isValid()) {
+            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
+                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
+                    blk->isDirty() ? "writeback" : "clean");
 
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr &pkt)
-{
-    BlkType *blk = NULL;
-    if (pkt->senderState) {
-        //Delete temp copy in MSHR, restore it.
-        delete ((MSHR*)pkt->senderState)->pkt;
-        ((MSHR*)pkt->senderState)->pkt = pkt;
-        if (pkt->result == Packet::Nacked) {
-            //pkt->reinitFromRequest();
-            warn("NACKs from devices not connected to the same bus "
-                 "not implemented\n");
-            return;
-        }
-        if (pkt->result == Packet::BadAddress) {
-            //Make the response a Bad address and send it
-        }
-//	MemDebug::cacheResponse(pkt);
-        DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
-
-        if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-            DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
-                    pkt->getAddr());
-            blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            PacketList writebacks;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from "
-                        "state %i to %i\n",
-                        pkt->getAddr(),
-                        old_state, new_state);
-            blk = handleFill(blk, (MSHR*)pkt->senderState,
-                                   new_state, writebacks, pkt);
-            while (!writebacks.empty()) {
-                PacketPtr wbPkt = writebacks.front();
-                missQueue->doWriteback(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
+            if (blk->isDirty()) {
+                // Save writeback packet for handling by caller
+                writebacks.push_back(writebackBlk(blk));
             }
         }
-        missQueue->handleResponse(pkt, curTick + hitLatency);
+
+        blk->tag = tags->extractTag(addr);
+        blk->status = coherence->getNewState(pkt);
+    } else {
+        // existing block... probably an upgrade
+        assert(blk->tag == tags->extractTag(addr));
+        // either we're getting new data or the block should already be valid
+        assert(pkt->isRead() || blk->isValid());
+        CacheBlk::State old_state = blk->status;
+        blk->status = coherence->getNewState(pkt, old_state);
+        if (blk->status != old_state)
+            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+                    addr, old_state, blk->status);
+        else
+            warn("Changing state to same value\n");
     }
+
+    // if we got new data, copy it in
+    if (pkt->isRead()) {
+        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    }
+
+    blk->whenReady = pkt->finishTime;
+
+    return blk;
 }
 
+
+/////////////////////////////////////////////////////
+//
+// Snoop path: requests coming in from the memory side
+//
+/////////////////////////////////////////////////////
+
 template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::getCoherencePacket()
+void
+Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
+                                                  uint8_t *blk_data)
 {
-    return coherence->getPacket();
+    // timing-mode snoop responses require a new packet
+    PacketPtr pkt = new Packet(req_pkt);
+    pkt->allocate();
+    pkt->makeTimingResponse();
+    pkt->setDataFromBlock(blk_data, blkSize);
+    memSidePort->respond(pkt, curTick + hitLatency);
 }
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::sendCoherenceResult(PacketPtr &pkt,
-                                                         MSHR *cshr,
-                                                         bool success)
+Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
+                                       bool is_timing)
 {
-    coherence->sendResult(pkt, cshr, success);
+    if (!blk || !blk->isValid()) {
+        return;
+    }
+
+    // we may end up modifying both the block state and the packet (if
+    // we respond in atomic mode), so just figure out what to do now
+    // and then do it later
+    bool supply = blk->isDirty() && pkt->isRead();
+    bool invalidate = pkt->isInvalidate();
+
+    if (pkt->isRead() && !pkt->isInvalidate()) {
+        assert(!pkt->needsExclusive());
+        pkt->assertShared();
+        int bits_to_clear = BlkWritable;
+        const bool haveOwnershipState = true; // for now
+        if (!haveOwnershipState) {
+            // if we don't support pure ownership (dirty && !writable),
+            // have to clear dirty bit here, assume memory snarfs data
+            // on cache-to-cache xfer
+            bits_to_clear |= BlkDirty;
+        }
+        blk->status &= ~bits_to_clear;
+    }
+
+    if (supply) {
+        pkt->assertMemInhibit();
+        if (is_timing) {
+            doTimingSupplyResponse(pkt, blk->data);
+        } else {
+            pkt->makeAtomicResponse();
+            pkt->setDataFromBlock(blk->data, blkSize);
+        }
+    }
+
+    // Do this last in case it deallocates block data or something
+    // like that
+    if (invalidate) {
+        tags->invalidateBlk(blk);
+    }
+
+    DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n",
+            pkt->cmdString(), blockAlign(pkt->getAddr()),
+            supply ? "supplying data, " : "", blk->status);
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
+Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
@@ -810,373 +877,201 @@ Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
         return;
     }
 
-    //Send a timing (true) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, true)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    if (coherence->hasProtocol() || pkt->isInvalidate()) {
-        //@todo Move this into handle bus req
-        //If we find an mshr, and it is in service, we need to NACK or
-        //invalidate
-        if (mshr) {
-            if (mshr->inService) {
-                if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
-                    && (pkt->cmd != MemCmd::InvalidateReq
-                        && pkt->cmd != MemCmd::WriteInvalidateReq)) {
-                    //If the outstanding request was an invalidate
-                    //(upgrade,readex,..)  Then we need to ACK the request
-                    //until we get the data Also NACK if the outstanding
-                    //request is not a cachefill (writeback)
-                    assert(!(pkt->flags & SATISFIED));
-                    pkt->flags |= SATISFIED;
-                    pkt->flags |= NACKED_LINE;
-                    SIGNAL_NACK_HACK = true;
-                    ///@todo NACK's from other levels
-                    //warn("NACKs from devices not connected to the same bus "
-                    //"not implemented\n");
-                    //respondToSnoop(pkt, curTick + hitLatency);
-                    return;
-                }
-                else {
-                    //The supplier will be someone else, because we are
-                    //waiting for the data.  This should cause this cache to
-                    //be forced to go to the shared state, not the exclusive
-                    //even though the shared line won't be asserted.  But for
-                    //now we will just invlidate ourselves and allow the other
-                    //cache to go into the exclusive state.  @todo Make it so
-                    //a read to a pending read doesn't invalidate.  @todo Make
-                    //it so that a read to a pending read can't be exclusive
-                    //now.
-
-                    //Set the address so find match works
-                    //panic("Don't have invalidates yet\n");
-                    invalidatePkt->addrOverride(pkt->getAddr());
-
-                    //Append the invalidate on
-                    missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to addr: %x\n",
-                            pkt->getAddr());
-                    return;
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    // better not be snooping a request that conflicts with something
+    // we have outstanding...
+    assert(!mshr || !mshr->inService);
+
+    //We also need to check the writeback buffers and handle those
+    std::vector<MSHR *> writebacks;
+    if (writeBuffer.findMatches(blk_addr, writebacks)) {
+        DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
+                pkt->getAddr());
+
+        //Look through writebacks for any non-uncachable writes, use that
+        for (int i=0; i<writebacks.size(); i++) {
+            mshr = writebacks[i];
+            assert(!mshr->isUncacheable());
+
+            if (pkt->isRead()) {
+                pkt->assertMemInhibit();
+                if (!pkt->needsExclusive()) {
+                    pkt->assertShared();
+                } else {
+                    // if we're not asserting the shared line, we need to
+                    // invalidate our copy.  we'll do that below as long as
+                    // the packet's invalidate flag is set...
+                    assert(pkt->isInvalidate());
                 }
+                doTimingSupplyResponse(pkt, mshr->writeData);
             }
-        }
-        //We also need to check the writeback buffers and handle those
-        std::vector<MSHR *> writebacks;
-        if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
-                    pkt->getAddr());
-
-            //Look through writebacks for any non-uncachable writes, use that
-            for (int i=0; i<writebacks.size(); i++) {
-                mshr = writebacks[i];
-
-                if (!mshr->pkt->req->isUncacheable()) {
-                    if (pkt->isRead()) {
-                        //Only Upgrades don't get here
-                        //Supply the data
-                        assert(!(pkt->flags & SATISFIED));
-                        pkt->flags |= SATISFIED;
-
-                        //If we are in an exclusive protocol, make it ask again
-                        //to get write permissions (upgrade), signal shared
-                        pkt->flags |= SHARED_LINE;
-
-                        assert(pkt->isRead());
-                        Addr offset = pkt->getAddr() & (blkSize - 1);
-                        assert(offset < blkSize);
-                        assert(pkt->getSize() <= blkSize);
-                        assert(offset + pkt->getSize() <=blkSize);
-                        std::memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
-
-                        respondToSnoop(pkt, curTick + hitLatency);
-                    }
-
-                    if (pkt->isInvalidate()) {
-                        //This must be an upgrade or other cache will take
-                        //ownership
-                        missQueue->markInService(mshr->pkt, mshr);
-                    }
-                    return;
-                }
+
+            if (pkt->isInvalidate()) {
+                // Invalidation trumps our writeback... discard here
+                assert(0);
+                markInService(mshr);
             }
+            return;
         }
     }
-    CacheBlk::State new_state;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-
-    if (blk && mshr && !mshr->inService && new_state == 0) {
-            //There was a outstanding write to a shared block, not need ReadEx
-            //not update, so change No Allocate param in MSHR
-            mshr->pkt->flags &= ~NO_ALLOCATE;
-    }
 
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-        handleSnoop(blk, new_state, pkt);
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
-
-    handleSnoop(blk, new_state);
+    handleSnoop(pkt, blk, true);
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::snoopResponse(PacketPtr &pkt)
+Tick
+Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
 {
-    //Need to handle the response, if NACKED
-    if (pkt->flags & NACKED_LINE) {
-        //Need to mark it as not in service, and retry for bus
-        assert(0); //Yeah, we saw a NACK come through
-
-        //For now this should never get called, we return false when we see a
-        //NACK instead, by doing this we allow the bus_blocked mechanism to
-        //handle the retry For now it retrys in just 2 cycles, need to figure
-        //out how to change that Eventually we will want to also have success
-        //come in as a parameter Need to make sure that we handle the
-        //functionality that happens on successufl return of the sendAddr
-        //function
+    if (pkt->req->isUncacheable()) {
+        // Can't get a hit on an uncacheable address
+        // Revisit this for multi level coherence
+        return hitLatency;
     }
+
+    BlkType *blk = tags->findBlock(pkt->getAddr());
+    handleSnoop(pkt, blk, false);
+    return hitLatency;
 }
 
 
-/**
- * @todo Fix to not assume write allocate
- */
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::probe(PacketPtr &pkt, bool update,
-                                           CachePort* otherSidePort)
+MSHR *
+Cache<TagStore,Coherence>::getNextMSHR()
 {
-//    MemDebug::cacheProbe(pkt);
-    if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) {
-            //Upgrade or Invalidate, satisfy it, don't forward
-            DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-            pkt->flags |= SATISFIED;
-            return 0;
-        }
-    }
+    // Check both MSHR queue and write buffer for potential requests
+    MSHR *miss_mshr  = mshrQueue.getNextMSHR();
+    MSHR *write_mshr = writeBuffer.getNextMSHR();
 
-    if (!update && (otherSidePort == cpuSidePort)) {
-        // Still need to change data in all locations.
-        otherSidePort->checkAndSendFunctional(pkt);
-        if (pkt->isRead() && pkt->result == Packet::Success)
-            return 0;
+    // Now figure out which one to send... some cases are easy
+    if (miss_mshr && !write_mshr) {
+        return miss_mshr;
+    }
+    if (write_mshr && !miss_mshr) {
+        return write_mshr;
     }
 
-    PacketList writebacks;
-    int lat;
-
-    BlkType *blk = handleAccess(pkt, lat, writebacks, update);
-
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(),
-            pkt->getAddr(), (blk) ? "hit" : "miss");
-
-
-    // Need to check for outstanding misses and writes
-    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
-
-    // There can only be one matching outstanding miss.
-    MSHR* mshr = missQueue->findMSHR(blk_addr);
-
-    // There can be many matching outstanding writes.
-    std::vector<MSHR*> writes;
-    missQueue->findWrites(blk_addr, writes);
-
-    if (!update) {
-        bool notDone = !(pkt->flags & SATISFIED); //Hit in cache (was a block)
-        // Check for data in MSHR and writebuffer.
-        if (mshr) {
-            MSHR::TargetList *targets = mshr->getTargetList();
-            MSHR::TargetList::iterator i = targets->begin();
-            MSHR::TargetList::iterator end = targets->end();
-            for (; i != end && notDone; ++i) {
-                PacketPtr target = *i;
-                // If the target contains data, and it overlaps the
-                // probed request, need to update data
-                if (target->intersect(pkt)) {
-                    DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a MSHR\n",
-                            pkt->cmdString(), blk_addr);
-                    notDone = fixPacket(pkt, target);
-                }
-            }
-        }
-        for (int i = 0; i < writes.size() && notDone; ++i) {
-            PacketPtr write = writes[i]->pkt;
-            if (write->intersect(pkt)) {
-                DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a writeback\n",
-                        pkt->cmdString(), blk_addr);
-                notDone = fixPacket(pkt, write);
+    if (miss_mshr && write_mshr) {
+        // We have one of each... normally we favor the miss request
+        // unless the write buffer is full
+        if (writeBuffer.isFull() && writeBuffer.inServiceEntries == 0) {
+            // Write buffer is full, so we'd like to issue a write;
+            // need to search MSHR queue for conflicting earlier miss.
+            MSHR *conflict_mshr =
+                mshrQueue.findPending(write_mshr->addr, write_mshr->size);
+
+            if (conflict_mshr && conflict_mshr->order < write_mshr->order) {
+                // Service misses in order until conflict is cleared.
+                return conflict_mshr;
             }
-        }
-        if (notDone && otherSidePort == memSidePort) {
-            otherSidePort->checkAndSendFunctional(pkt);
-            assert(pkt->result == Packet::Success);
-        }
-        return 0;
-    } else if (!blk && !(pkt->flags & SATISFIED)) {
-        // update the cache state and statistics
-        if (mshr || !writes.empty()){
-            // Can't handle it, return request unsatisfied.
-            panic("Atomic access ran into outstanding MSHR's or WB's!");
-        }
-        if (!pkt->req->isUncacheable() /*Uncacheables just go through*/
-            && (pkt->cmd != MemCmd::Writeback)/*Writebacks on miss fall through*/) {
-                // Fetch the cache block to fill
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd temp_cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-
-            PacketPtr busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
 
-            busPkt->allocate();
-
-            busPkt->time = curTick;
+            // No conflicts; issue write
+            return write_mshr;
+        }
 
-            DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                    busPkt->cmdString(), busPkt->getAddr());
+        // Write buffer isn't full, but need to check it for
+        // conflicting earlier writeback
+        MSHR *conflict_mshr =
+            writeBuffer.findPending(miss_mshr->addr, miss_mshr->size);
+        if (conflict_mshr) {
+            // not sure why we don't check order here... it was in the
+            // original code but commented out.
+
+            // The only way this happens is if we are
+            // doing a write and we didn't have permissions
+            // then subsequently saw a writeback (owned got evicted)
+            // We need to make sure to perform the writeback first
+            // To preserve the dirty data, then we can issue the write
+
+            // should we return write_mshr here instead?  I.e. do we
+            // have to flush writes in order?  I don't think so... not
+            // for Alpha anyway.  Maybe for x86?
+            return conflict_mshr;
+        }
 
-            lat = memSidePort->sendAtomic(busPkt);
+        // No conclifts; issue read
+        return miss_mshr;
+    }
 
-            //Be sure to flip the response to a request for coherence
-            if (busPkt->needsResponse()) {
-                busPkt->makeAtomicResponse();
-            }
+    // fall through... no pending requests.  Try a prefetch.
+    assert(!miss_mshr && !write_mshr);
+    if (!mshrQueue.isFull()) {
+        // If we have a miss queue slot, we can try a prefetch
+        PacketPtr pkt = prefetcher->getPacket();
+        if (pkt) {
+            // Update statistic on number of prefetches issued
+            // (hwpf_mshr_misses)
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // Don't request bus, since we already have it
+            return allocateMissBuffer(pkt, curTick, false);
+        }
+    }
 
-/*		if (!(busPkt->flags & SATISFIED)) {
-// blocked at a higher level, just return
-return 0;
+    return NULL;
 }
 
-*/		misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state =
-                coherence->getNewState(busPkt, old_state);
-            DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                    busPkt->cmdString(), busPkt->getAddr(), old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", busPkt->getAddr(), old_state, new_state);
+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::getTimingPacket()
+{
+    MSHR *mshr = getNextMSHR();
 
-            handleFill(blk, busPkt, new_state, writebacks, pkt);
-            //Free the packet
-            delete busPkt;
+    if (mshr == NULL) {
+        return NULL;
+    }
 
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
-                return lat + hitLatency;
-        } else {
-            return memSidePort->sendAtomic(pkt);
-        }
+    // use request from 1st target
+    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
+    PacketPtr pkt = NULL;
+
+    if (mshr->isSimpleForward()) {
+        // no response expected, just forward packet as it is
+        assert(tags->findBlock(mshr->addr) == NULL);
+        pkt = tgt_pkt;
     } else {
-        if (blk) {
-            // There was a cache hit.
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
+        BlkType *blk = tags->findBlock(mshr->addr);
+        pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+
+        mshr->isCacheFill = (pkt != NULL);
+
+        if (pkt == NULL) {
+            // not a cache block request, but a response is expected
+            assert(!mshr->isSimpleForward());
+            // make copy of current packet to forward, keep current
+            // copy for response handling
+            pkt = new Packet(tgt_pkt);
+            pkt->allocate();
+            if (pkt->isWrite()) {
+                pkt->setData(tgt_pkt->getPtr<uint8_t>());
             }
-
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
         }
-
-        return hitLatency;
     }
 
-    return 0;
+    assert(pkt != NULL);
+    pkt->senderState = mshr;
+    return pkt;
 }
 
-template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::snoopProbe(PacketPtr &pkt)
-{
-    //Send a atomic (false) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, false)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        return hitLatency;
-    }
-
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    CacheBlk::State new_state = 0;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-            handleSnoop(blk, new_state, pkt);
-            return hitLatency;
-    }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n",
-                    pkt->cmdString(), blk_addr, new_state);
-    handleSnoop(blk, new_state);
-    return 0;
-}
 
-template<class TagStore, class Coherence>
-Port *
-Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
-{
-    if (if_name == "" || if_name == "cpu_side")
-    {
-        if (cpuSidePort == NULL) {
-            cpuSidePort = new CpuSidePort(name() + "-cpu_side_port", this);
-            sendEvent = new ResponseEvent(cpuSidePort);
-        }
-        return cpuSidePort;
-    }
-    else if (if_name == "functional")
-    {
-        return new CpuSidePort(name() + "-cpu_side_funcport", this);
-    }
-    else if (if_name == "mem_side")
-    {
-        if (memSidePort != NULL)
-            panic("Already have a mem side for this cache\n");
-        memSidePort = new MemSidePort(name() + "-mem_side_port", this);
-        memSendEvent = new ResponseEvent(memSidePort);
-        return memSidePort;
-    }
-    else panic("Port name %s unrecognized\n", if_name);
-}
+///////////////
+//
+// CpuSidePort
+//
+///////////////
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+Cache<TagStore,Coherence>::CpuSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
-    if (cpuSidePort == p || memSidePort == p)
-        panic("Can only delete functional ports\n");
-
-    delete p;
+    // CPU side port doesn't snoop; it's a target only.
+    bool dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
+    snoop = false;
 }
 
 
@@ -1184,58 +1079,58 @@ template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
-    assert(pkt->result != Packet::Nacked);
-
-    if (!pkt->req->isUncacheable()
-        && pkt->isInvalidate()
-        && !pkt->isRead() && !pkt->isWrite()) {
-        //Upgrade or Invalidate
-        //Look into what happens if two slave caches on bus
-        DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-
-        assert(!(pkt->flags & SATISFIED));
-        pkt->flags |= SATISFIED;
-        //Invalidates/Upgrades need no response if they get the bus
-        return true;
-    }
-
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isWrite() && (pkt->req->isLocked())) {
-        pkt->req->setExtraData(1);
-    }
-    myCache()->access(pkt);
+    myCache()->timingAccess(pkt);
     return true;
 }
 
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
-    myCache()->probe(pkt, true, NULL);
-    //TEMP ALWAYS SUCCES FOR NOW
-    pkt->result = Packet::Success;
-    //Fix this timing info
-    return myCache()->hitLatency;
+    return myCache()->atomicAccess(pkt);
 }
 
+
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
-    if (checkFunctional(pkt)) {
-        //TEMP USE CPU?THREAD 0 0
-        pkt->req->setThreadContext(0,0);
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
+        myCache()->functionalAccess(pkt, cache->memSidePort);
+}
 
-        myCache()->probe(pkt, false, cache->memSidePort);
-        //TEMP ALWAYS SUCCESFUL FOR NOW
-        pkt->result = Packet::Success;
-    }
+
+template<class TagStore, class Coherence>
+Cache<TagStore,Coherence>::
+CpuSidePort::CpuSidePort(const std::string &_name,
+                         Cache<TagStore,Coherence> *_cache)
+    : BaseCache::CachePort(_name, _cache)
+{
+}
+
+///////////////
+//
+// MemSidePort
+//
+///////////////
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+{
+    otherPort->getPeerAddressRanges(resp, snoop);
+    // Memory-side port always snoops, so unconditionally set flag for
+    // caller.
+    snoop = true;
 }
 
 
@@ -1249,58 +1144,124 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
     if (pkt->result == Packet::Nacked)
         panic("Need to implement cache resending nacked packets!\n");
 
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isResponse())
+    if (pkt->isResponse()) {
         myCache()->handleResponse(pkt);
-    else {
-        //Check if we should do the snoop
-        if (pkt->flags & SNOOP_COMMIT)
-            myCache()->snoop(pkt);
+    } else {
+        myCache()->snoopTiming(pkt);
     }
     return true;
 }
 
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (pkt->isResponse())
-        myCache()->handleResponse(pkt);
-    else
-        return myCache()->snoopProbe(pkt);
-    //Fix this timing info
-    return myCache()->hitLatency;
+    // in atomic mode, responses go back to the sender via the
+    // function return from sendAtomic(), not via a separate
+    // sendAtomic() from the responder.  Thus we should never see a
+    // response packet in recvAtomic() (anywhere, not just here).
+    assert(!pkt->isResponse());
+    return myCache()->snoopAtomic(pkt);
 }
 
+
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
-    myCache()->probe(pkt, false, cache->cpuSidePort);
+    checkFunctional(pkt);
     if (pkt->result != Packet::Success)
-        checkFunctional(pkt);
+        myCache()->functionalAccess(pkt, cache->cpuSidePort);
 }
 
 
+
 template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-CpuSidePort::CpuSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, true)
+void
+Cache<TagStore,Coherence>::MemSidePort::sendPacket()
+{
+    // if we have responses that are ready, they take precedence
+    if (deferredPacketReady()) {
+        bool success = sendTiming(transmitList.front().pkt);
+
+        if (success) {
+            //send successful, remove packet
+            transmitList.pop_front();
+        }
+
+        waitingOnRetry = !success;
+    } else {
+        // check for non-response packets (requests & writebacks)
+        PacketPtr pkt = myCache()->getTimingPacket();
+        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+
+        bool success = sendTiming(pkt);
+        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                pkt->getAddr(), success ? "successful" : "unsuccessful");
+
+        waitingOnRetry = !success;
+        if (waitingOnRetry) {
+            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        } else {
+            myCache()->markInService(mshr);
+        }
+    }
+
+
+    // tried to send packet... if it was successful (no retry), see if
+    // we need to rerequest bus or not
+    if (!waitingOnRetry) {
+        if (isBusRequested()) {
+            // more requests/writebacks: rerequest ASAP
+            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
+                    name());
+            sendEvent->schedule(curTick+1);
+        } else if (!transmitList.empty()) {
+            // deferred packets: rerequest bus, but possibly not until later
+            Tick time = transmitList.front().tick;
+            sendEvent->schedule(time <= curTick ? curTick+1 : time);
+        } else {
+            // no more to send right now: if we're draining, we may be done
+            if (drainEvent) {
+                drainEvent->process();
+                drainEvent = NULL;
+            }
+        }
+    }
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::recvRetry()
 {
+    assert(waitingOnRetry);
+    sendPacket();
 }
 
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::processSendEvent()
+{
+    assert(!waitingOnRetry);
+    sendPacket();
+}
+
+
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 MemSidePort::MemSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, false)
+    : BaseCache::CachePort(_name, _cache)
 {
+    // override default send event from SimpleTimingPort
+    delete sendEvent;
+    sendEvent = new SendEvent(this);
 }
-