34 files changed, 644 insertions, 2743 deletions
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index f6d42b1ef..bef1b45d2 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -38,8 +38,6 @@ class BaseCache(MemObject):
     block_size = Param.Int("block size in bytes")
     latency = Param.Latency("Latency")
     hash_delay = Param.Int(1, "time in cycles of hash access")
-    lifo = Param.Bool(False,
-        "whether this NIC partition should use LIFO repl. policy")
     max_miss_count = Param.Counter(0,
         "number of misses to handle before calling exit")
     mshrs = Param.Int("number of MSHRs (max outstanding requests)")
@@ -47,9 +45,6 @@ class BaseCache(MemObject):
         "always service demand misses first")
     repl = Param.Repl(NULL, "replacement policy")
     size = Param.MemorySize("capacity in bytes")
-    split = Param.Bool(False, "whether or not this cache is split")
-    split_size = Param.Int(0,
-        "How many ways of the cache belong to CPU/LRU partition")
     subblock_size = Param.Int(0,
         "Size of subblock in IIC used for compression")
     tgts_per_mshr = Param.Int("max number of accesses per MSHR")
@@ -57,12 +52,10 @@ class BaseCache(MemObject):
     two_queue = Param.Bool(False,
         "whether the lifo should have two queue replacement")
     write_buffers = Param.Int(8, "number of write buffers")
-    prefetch_miss = Param.Bool(False,
-         "wheter you are using the hardware prefetcher from Miss stream")
-    prefetch_access = Param.Bool(False,
-         "wheter you are using the hardware prefetcher from Access stream")
+    prefetch_on_access = Param.Bool(False,
+         "notify the hardware prefetcher on every access (not just misses)")
     prefetcher_size = Param.Int(100,
-         "Number of entries in the harware prefetch queue")
+         "Number of entries in the hardware prefetch queue")
     prefetch_past_page = Param.Bool(False,
          "Allow prefetches to cross virtual page boundaries")
     prefetch_serial_squash = Param.Bool(False,
@@ -74,9 +67,9 @@ class BaseCache(MemObject):
     prefetch_policy = Param.Prefetch('none',
          "Type of prefetcher to use")
     prefetch_cache_check_push = Param.Bool(True,
-         "Check if in cash on push or pop of prefetch queue")
+         "Check if in cache on push or pop of prefetch queue")
     prefetch_use_cpu_id = Param.Bool(True,
-         "Use the CPU ID to seperate calculations of prefetches")
+         "Use the CPU ID to separate calculations of prefetches")
     prefetch_data_accesses_only = Param.Bool(False,
          "Only prefetch on data not on instruction accesses")
     cpu_side = Port("Port on side closer to CPU")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index ac0d54bf6..956375530 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -122,7 +122,7 @@ BaseCache::CachePort::clearBlocked()
         mustSendRetry = false;
         SendRetryEvent *ev = new SendRetryEvent(this, true);
         // @TODO: need to find a better time (next bus cycle?)
-        ev->schedule(curTick + 1);
+        schedule(ev, curTick + 1);
     }
 }
 
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index d97021024..4319717e5 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -223,14 +223,14 @@ class BaseCache : public MemObject
      */
 
     /** Number of hits per thread for each type of command. @sa Packet::Command */
-    Stats::Vector<> hits[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
     /** Number of hits for demand accesses. */
     Stats::Formula demandHits;
     /** Number of hit for all accesses. */
     Stats::Formula overallHits;
 
     /** Number of misses per thread for each type of command. @sa Packet::Command */
-    Stats::Vector<> misses[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
     /** Number of misses for demand accesses. */
     Stats::Formula demandMisses;
     /** Number of misses for all accesses. */
@@ -240,7 +240,7 @@ class BaseCache : public MemObject
      * Total number of cycles per thread/command spent waiting for a miss.
      * Used to calculate the average miss latency.
      */
-    Stats::Vector<> missLatency[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
     /** Total number of cycles spent waiting for demand misses. */
     Stats::Formula demandMissLatency;
     /** Total number of cycles spent waiting for all misses. */
@@ -268,50 +268,50 @@ class BaseCache : public MemObject
     Stats::Formula overallAvgMissLatency;
 
     /** The total number of cycles blocked for each blocked cause. */
-    Stats::Vector<> blocked_cycles;
+    Stats::Vector blocked_cycles;
     /** The number of times this cache blocked for each blocked cause. */
-    Stats::Vector<> blocked_causes;
+    Stats::Vector blocked_causes;
 
     /** The average number of cycles blocked for each blocked cause. */
     Stats::Formula avg_blocked;
 
     /** The number of fast writes (WH64) performed. */
-    Stats::Scalar<> fastWrites;
+    Stats::Scalar fastWrites;
 
     /** The number of cache copies performed. */
-    Stats::Scalar<> cacheCopies;
+    Stats::Scalar cacheCopies;
 
     /** Number of blocks written back per thread. */
-    Stats::Vector<> writebacks;
+    Stats::Vector writebacks;
 
     /** Number of misses that hit in the MSHRs per command and thread. */
-    Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
     /** Demand misses that hit in the MSHRs. */
     Stats::Formula demandMshrHits;
     /** Total number of misses that hit in the MSHRs. */
     Stats::Formula overallMshrHits;
 
     /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
     /** Demand misses that miss in the MSHRs. */
     Stats::Formula demandMshrMisses;
     /** Total number of misses that miss in the MSHRs. */
     Stats::Formula overallMshrMisses;
 
     /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
     /** Total number of misses that miss in the MSHRs. */
     Stats::Formula overallMshrUncacheable;
 
     /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
     /** Total cycle latency of demand MSHR misses. */
     Stats::Formula demandMshrMissLatency;
     /** Total cycle latency of overall MSHR misses. */
     Stats::Formula overallMshrMissLatency;
 
     /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
+    Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
     /** Total cycle latency of overall MSHR misses. */
     Stats::Formula overallMshrUncacheableLatency;
 
@@ -342,11 +342,11 @@ class BaseCache : public MemObject
     Stats::Formula overallAvgMshrUncacheableLatency;
 
     /** The number of times a thread hit its MSHR cap. */
-    Stats::Vector<> mshr_cap_events;
+    Stats::Vector mshr_cap_events;
     /** The number of times software prefetches caused the MSHR to block. */
-    Stats::Vector<> soft_prefetch_mshr_full;
+    Stats::Vector soft_prefetch_mshr_full;
 
-    Stats::Scalar<> mshr_no_allocate_misses;
+    Stats::Scalar mshr_no_allocate_misses;
 
     /**
      * @}
@@ -445,12 +445,6 @@ class BaseCache : public MemObject
         }
     }
 
-    Tick nextMSHRReadyTime()
-    {
-        return std::min(mshrQueue.nextMSHRReadyTime(),
-                        writeBuffer.nextMSHRReadyTime());
-    }
-
     /**
      * Request the master bus for the given cause and time.
      * @param cause The reason for the request.
@@ -467,10 +461,11 @@ class BaseCache : public MemObject
      */
     void deassertMemSideBusRequest(RequestCause cause)
     {
-        // obsolete!!
-        assert(false);
-        // memSidePort->deassertBusRequest(cause);
-        // checkDrain();
+        // Obsolete... we no longer signal bus requests explicitly so
+        // we can't deassert them.  Leaving this in as a no-op since
+        // the prefetcher calls it to indicate that it no longer wants
+        // to request a prefetch, and someday that might be
+        // interesting again.
     }
 
     virtual unsigned int drain(Event *de);
@@ -481,7 +476,7 @@ class BaseCache : public MemObject
 
     void incMissCount(PacketPtr pkt)
     {
-        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+        misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
 
         if (missCount) {
             --missCount;
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 127c547ac..fe65672d6 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -38,8 +38,8 @@
 #include <list>
 
 #include "base/printable.hh"
-#include "sim/core.hh"		// for Tick
-#include "arch/isa_traits.hh"	// for Addr
+#include "sim/core.hh"          // for Tick
+#include "arch/isa_traits.hh"   // for Addr
 #include "mem/packet.hh"
 #include "mem/request.hh"
 
@@ -48,17 +48,17 @@
  */
 enum CacheBlkStatusBits {
     /** valid, readable */
-    BlkValid =		0x01,
+    BlkValid =          0x01,
     /** write permission */
-    BlkWritable =	0x02,
+    BlkWritable =       0x02,
     /** read permission (yes, block can be valid but not readable) */
-    BlkReadable =	0x04,
+    BlkReadable =       0x04,
     /** dirty (modified) */
-    BlkDirty =		0x08,
+    BlkDirty =          0x08,
     /** block was referenced */
-    BlkReferenced =	0x10,
+    BlkReferenced =     0x10,
     /** block was a hardware prefetch yet unaccessed*/
-    BlkHWPrefetched =	0x20
+    BlkHWPrefetched =   0x20
 };
 
 /**
@@ -108,18 +108,16 @@ class CacheBlk
      */
     class Lock {
       public:
-        int cpuNum;	// locking CPU
-        int threadNum;	// locking thread ID within CPU
+        int contextId;     // locking context
 
         // check for matching execution context
         bool matchesContext(Request *req)
         {
-            return (cpuNum == req->getCpuNum() &&
-                    threadNum == req->getThreadNum());
+            return (contextId == req->contextId());
         }
 
         Lock(Request *req)
-            : cpuNum(req->getCpuNum()), threadNum(req->getThreadNum())
+            : contextId(req->contextId())
         {
         }
     };
@@ -207,7 +205,7 @@ class CacheBlk
      * be touched.
      * @return True if the block was a hardware prefetch, unaccesed.
      */
-    bool isPrefetch() const
+    bool wasPrefetched() const
     {
         return (status & BlkHWPrefetched) != 0;
     }
diff --git a/src/mem/cache/builder.cc b/src/mem/cache/builder.cc
index db900c64c..599353b88 100644
--- a/src/mem/cache/builder.cc
+++ b/src/mem/cache/builder.cc
@@ -38,7 +38,6 @@
 // Must be included first to determine which caches we want
 #include "enums/Prefetch.hh"
 #include "mem/config/cache.hh"
-#include "mem/config/prefetch.hh"
 #include "mem/cache/base.hh"
 #include "mem/cache/cache.hh"
 #include "mem/bus.hh"
@@ -57,149 +56,78 @@
 #include "mem/cache/tags/iic.hh"
 #endif
 
-#if defined(USE_CACHE_SPLIT)
-#include "mem/cache/tags/split.hh"
-#endif
-
-#if defined(USE_CACHE_SPLIT_LIFO)
-#include "mem/cache/tags/split_lifo.hh"
-#endif
-
 //Prefetcher Headers
-#if defined(USE_GHB)
 #include "mem/cache/prefetch/ghb.hh"
-#endif
-#if defined(USE_TAGGED)
 #include "mem/cache/prefetch/tagged.hh"
-#endif
-#if defined(USE_STRIDED)
 #include "mem/cache/prefetch/stride.hh"
-#endif
 
 
 using namespace std;
 using namespace TheISA;
 
-#define BUILD_CACHE(TAGS, tags)                                      \
-    do {                                                                \
-        BasePrefetcher *pf;                                             \
-        if (prefetch_policy == Enums::tagged) {                         \
-            BUILD_TAGGED_PREFETCHER(TAGS);                              \
-        }                                                               \
-        else if (prefetch_policy == Enums::stride) {                    \
-            BUILD_STRIDED_PREFETCHER(TAGS);                             \
-        }                                                               \
-        else if (prefetch_policy == Enums::ghb) {                       \
-            BUILD_GHB_PREFETCHER(TAGS);                                 \
-        }                                                               \
-        else {                                                          \
-            BUILD_NULL_PREFETCHER(TAGS);                                \
-        }                                                               \
-        Cache<TAGS> *retval =                                           \
-            new Cache<TAGS>(this, tags, pf);                            \
-        return retval;                                                  \
+#define BUILD_CACHE(TAGS, tags)                         \
+    do {                                                \
+        BasePrefetcher *pf;                             \
+        if (prefetch_policy == Enums::tagged) {         \
+            pf = new TaggedPrefetcher(this);            \
+        }                                               \
+        else if (prefetch_policy == Enums::stride) {    \
+            pf = new StridePrefetcher(this);            \
+        }                                               \
+        else if (prefetch_policy == Enums::ghb) {       \
+            pf = new GHBPrefetcher(this);               \
+        }                                               \
+        else {                                          \
+            pf = NULL;                                  \
+        }                                               \
+        Cache<TAGS> *retval =                           \
+            new Cache<TAGS>(this, tags, pf);            \
+        return retval;                                  \
     } while (0)
 
-#define BUILD_CACHE_PANIC(x) do {			\
-        panic("%s not compiled into M5", x);		\
+#define BUILD_CACHE_PANIC(x) do {                       \
+        panic("%s not compiled into M5", x);            \
     } while (0)
 
 #if defined(USE_CACHE_FALRU)
-#define BUILD_FALRU_CACHE do {			    \
+#define BUILD_FALRU_CACHE do {                              \
         FALRU *tags = new FALRU(block_size, size, latency); \
-        BUILD_CACHE(FALRU, tags);		\
+        BUILD_CACHE(FALRU, tags);                           \
     } while (0)
 #else
 #define BUILD_FALRU_CACHE BUILD_CACHE_PANIC("falru cache")
 #endif
 
 #if defined(USE_CACHE_LRU)
-#define BUILD_LRU_CACHE do {				\
-        LRU *tags = new LRU(numSets, block_size, assoc, latency);	\
-        BUILD_CACHE(LRU, tags);			\
+#define BUILD_LRU_CACHE do {                                            \
+        LRU *tags = new LRU(numSets, block_size, assoc, latency);       \
+        BUILD_CACHE(LRU, tags);                                         \
     } while (0)
 #else
 #define BUILD_LRU_CACHE BUILD_CACHE_PANIC("lru cache")
 #endif
 
-#if defined(USE_CACHE_SPLIT)
-#define BUILD_SPLIT_CACHE do {					\
-        Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \
-                                two_queue, latency);		\
-        BUILD_CACHE(Split, tags);			\
-    } while (0)
-#else
-#define BUILD_SPLIT_CACHE BUILD_CACHE_PANIC("split cache")
-#endif
-
-#if defined(USE_CACHE_SPLIT_LIFO)
-#define BUILD_SPLIT_LIFO_CACHE do {				\
-        SplitLIFO *tags = new SplitLIFO(block_size, size, assoc,        \
-                                        latency, two_queue, -1);	\
-        BUILD_CACHE(SplitLIFO, tags);			\
-    } while (0)
-#else
-#define BUILD_SPLIT_LIFO_CACHE BUILD_CACHE_PANIC("lifo cache")
-#endif
-
 #if defined(USE_CACHE_IIC)
-#define BUILD_IIC_CACHE do {			\
-        IIC *tags = new IIC(iic_params);		\
-        BUILD_CACHE(IIC, tags);	\
+#define BUILD_IIC_CACHE do {                            \
+        IIC *tags = new IIC(iic_params);                \
+        BUILD_CACHE(IIC, tags);                         \
     } while (0)
 #else
 #define BUILD_IIC_CACHE BUILD_CACHE_PANIC("iic")
 #endif
 
-#define BUILD_CACHES do {				\
-        if (repl == NULL) {				\
-            if (numSets == 1) {				\
-                BUILD_FALRU_CACHE;		\
-            } else {					\
-                if (split == true) {			\
-                    BUILD_SPLIT_CACHE;		\
-                } else if (lifo == true) {		\
-                    BUILD_SPLIT_LIFO_CACHE;	\
-                } else {				\
-                    BUILD_LRU_CACHE;		\
-                }					\
-            }						\
-        } else {					\
-            BUILD_IIC_CACHE;			\
-        }						\
-    } while (0)
-
-#define BUILD_COHERENCE(b) do {						\
+#define BUILD_CACHES do {                               \
+        if (repl == NULL) {                             \
+            if (numSets == 1) {                         \
+                BUILD_FALRU_CACHE;                      \
+            } else {                                    \
+               BUILD_LRU_CACHE;                    \
+            }                                           \
+        } else {                                        \
+            BUILD_IIC_CACHE;                            \
+        }                                               \
     } while (0)
 
-#if defined(USE_TAGGED)
-#define BUILD_TAGGED_PREFETCHER(t)                              \
-    pf = new TaggedPrefetcher(this)
-#else
-#define BUILD_TAGGED_PREFETCHER(t) BUILD_CACHE_PANIC("Tagged Prefetcher")
-#endif
-
-#if defined(USE_STRIDED)
-#define BUILD_STRIDED_PREFETCHER(t)                             \
-    pf = new StridePrefetcher(this)
-#else
-#define BUILD_STRIDED_PREFETCHER(t) BUILD_CACHE_PANIC("Stride Prefetcher")
-#endif
-
-#if defined(USE_GHB)
-#define BUILD_GHB_PREFETCHER(t)                         \
-    pf = new GHBPrefetcher(this)
-#else
-#define BUILD_GHB_PREFETCHER(t) BUILD_CACHE_PANIC("GHB Prefetcher")
-#endif
-
-#if defined(USE_TAGGED)
-#define BUILD_NULL_PREFETCHER(t)                                \
-    pf = new TaggedPrefetcher(this)
-#else
-#define BUILD_NULL_PREFETCHER(t) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)")
-#endif
-
 BaseCache *
 BaseCacheParams::create()
 {
@@ -208,24 +136,6 @@ BaseCacheParams::create()
         subblock_size = block_size;
     }
 
-    //Warnings about prefetcher policy
-    if (prefetch_policy == Enums::none) {
-        if (prefetch_miss || prefetch_access)
-            panic("With no prefetcher, you shouldn't prefetch from"
-                  " either miss or access stream\n");
-    }
-
-    if (prefetch_policy == Enums::tagged || prefetch_policy == Enums::stride ||
-        prefetch_policy == Enums::ghb) {
-
-        if (!prefetch_miss && !prefetch_access)
-            warn("With this prefetcher you should chose a prefetch"
-                 " stream (miss or access)\nNo Prefetching will occur\n");
-
-        if (prefetch_miss && prefetch_access)
-            panic("Can't do prefetches from both miss and access stream");
-    }
-
 #if defined(USE_CACHE_IIC)
     // Build IIC params
     IIC::Params iic_params;
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index c640d4a60..d403535fc 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -50,14 +50,6 @@
 #include "mem/cache/tags/iic.hh"
 #endif
 
-#if defined(USE_CACHE_SPLIT)
-#include "mem/cache/tags/split.hh"
-#endif
-
-#if defined(USE_CACHE_SPLIT_LIFO)
-#include "mem/cache/tags/split_lifo.hh"
-#endif
-
 #include "mem/cache/cache_impl.hh"
 
 // Template Instantiations
@@ -76,12 +68,4 @@ template class Cache<IIC>;
 template class Cache<LRU>;
 #endif
 
-#if defined(USE_CACHE_SPLIT)
-template class Cache<Split>;
-#endif
-
-#if defined(USE_CACHE_SPLIT_LIFO)
-template class Cache<SplitLIFO>;
-#endif
-
 #endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index f5f65d4dd..4570b067b 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -64,8 +64,6 @@ class Cache : public BaseCache
     /** A typedef for a list of BlkType pointers. */
     typedef typename TagStore::BlkList BlkList;
 
-    bool prefetchAccess;
-
   protected:
 
     class CpuSidePort : public CachePort
@@ -137,21 +135,14 @@ class Cache : public BaseCache
     BlkType *tempBlock;
 
     /**
-     * Can this cache should allocate a block on a line-sized write miss.
+     * This cache should allocate a block on a line-sized write miss.
      */
     const bool doFastWrites;
 
-    const bool prefetchMiss;
-
     /**
-     * Handle a replacement for the given request.
-     * @param blk A pointer to the block, usually NULL
-     * @param pkt The memory request to satisfy.
-     * @param new_state The new state of the block.
-     * @param writebacks A list to store any generated writebacks.
+     * Notify the prefetcher on every access, not just misses.
      */
-    BlkType* doReplacement(BlkType *blk, PacketPtr pkt,
-                           CacheBlk::State new_state, PacketList &writebacks);
+    const bool prefetchOnAccess;
 
     /**
      * Does all the processing necessary to perform the provided request.
@@ -159,10 +150,10 @@ class Cache : public BaseCache
      * @param lat The latency of the access.
      * @param writebacks List for any writebacks that need to be performed.
      * @param update True if the replacement data should be updated.
-     * @return Pointer to the cache block touched by the request. NULL if it
-     * was a miss.
+     * @return Boolean indicating whether the request was satisfied.
      */
-    bool access(PacketPtr pkt, BlkType *&blk, int &lat);
+    bool access(PacketPtr pkt, BlkType *&blk,
+                int &lat, PacketList &writebacks);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
@@ -181,7 +172,6 @@ class Cache : public BaseCache
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes two memory requests. One
      * contains the fill data, the other is an optional target to satisfy.
-     * Used for Cache::probe.
      * @param pkt The memory request with the fill data.
      * @param blk The cache block if it already exists.
      * @param writebacks List for any writebacks that need to be performed.
@@ -331,6 +321,11 @@ class Cache : public BaseCache
     bool inMissQueue(Addr addr) {
         return (mshrQueue.findMatch(addr) != 0);
     }
+
+    /**
+     * Find next request ready time from among possible sources.
+     */
+    Tick nextMSHRReadyTime();
 };
 
 #endif // __CACHE_HH__
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index e546e2a9a..a78fd3637 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -38,6 +38,7 @@
  */
 
 #include "sim/host.hh"
+#include "base/fast_alloc.hh"
 #include "base/misc.hh"
 #include "base/range_ops.hh"
 
@@ -52,11 +53,10 @@
 template<class TagStore>
 Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
     : BaseCache(p),
-      prefetchAccess(p->prefetch_access),
       tags(tags),
       prefetcher(pf),
       doFastWrites(true),
-      prefetchMiss(p->prefetch_miss)
+      prefetchOnAccess(p->prefetch_on_access)
 {
     tempBlock = new BlkType();
     tempBlock->data = new uint8_t[blkSize];
@@ -71,7 +71,8 @@ Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
     memSidePort->setOtherPort(cpuSidePort);
 
     tags->setCache(this);
-    prefetcher->setCache(this);
+    if (prefetcher)
+        prefetcher->setCache(this);
 }
 
 template<class TagStore>
@@ -80,7 +81,8 @@ Cache<TagStore>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    prefetcher->regStats(name());
+    if (prefetcher)
+        prefetcher->regStats(name());
 }
 
 template<class TagStore>
@@ -147,8 +149,10 @@ Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
             panic("Invalid size for conditional read/write\n");
     }
 
-    if (overwrite_mem)
+    if (overwrite_mem) {
         std::memcpy(blk_data, &overwrite_val, pkt->getSize());
+        blk->status |= BlkDirty;
+    }
 }
 
 
@@ -259,7 +263,8 @@ Cache<TagStore>::squash(int threadNum)
 
 template<class TagStore>
 bool
-Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
+Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
+                        int &lat, PacketList &writebacks)
 {
     if (pkt->req->isUncacheable())  {
         blk = NULL;
@@ -267,34 +272,16 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
         return false;
     }
 
-    blk = tags->findBlock(pkt->getAddr(), lat);
-
-    if (prefetchAccess) {
-        //We are determining prefetches on access stream, call prefetcher
-        prefetcher->handleMiss(pkt, curTick);
-    }
+    blk = tags->accessBlock(pkt->getAddr(), lat);
 
     DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
             (blk) ? "hit" : "miss");
 
     if (blk != NULL) {
 
-        if (blk->isPrefetch()) {
-            //Signal that this was a hit under prefetch (no need for
-            //use prefetch (only can get here if true)
-            DPRINTF(HWPrefetch, "Hit a block that was prefetched\n");
-            blk->status &= ~BlkHWPrefetched;
-            if (prefetchMiss) {
-                //If we are using the miss stream, signal the
-                //prefetcher otherwise the access stream would have
-                //already signaled this hit
-                prefetcher->handleMiss(pkt, curTick);
-            }
-        }
-
         if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
             // OK to satisfy access
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
             satisfyCpuSideRequest(pkt, blk);
             return true;
         }
@@ -307,7 +294,6 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
     // into the cache without having a writeable copy (or any copy at
     // all).
     if (pkt->cmd == MemCmd::Writeback) {
-        PacketList writebacks;
         assert(blkSize == pkt->getSize());
         if (blk == NULL) {
             // need to do a replacement
@@ -318,19 +304,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
                 incMissCount(pkt);
                 return false;
             }
+            tags->insertBlock(pkt->getAddr(), blk);
             blk->status = BlkValid | BlkReadable;
         }
         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
         blk->status |= BlkDirty;
-        // copy writebacks from replacement to write buffer
-        while (!writebacks.empty()) {
-            PacketPtr wbPkt = writebacks.front();
-            allocateWriteBuffer(wbPkt, curTick + hitLatency, true);
-            writebacks.pop_front();
-        }
         // nothing else to do; writeback doesn't expect response
         assert(!pkt->needsResponse());
-        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+        hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
         return true;
     }
 
@@ -346,7 +327,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 }
 
 
-class ForwardResponseRecord : public Packet::SenderState
+class ForwardResponseRecord : public Packet::SenderState, public FastAlloc
 {
     Packet::SenderState *prevSenderState;
     int prevSrc;
@@ -407,6 +388,9 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
             memSidePort->sendTiming(snoopPkt);
             // main memory will delete snoopPkt
         }
+        // since we're the official target but we aren't responding,
+        // delete the packet now.
+        delete pkt;
         return true;
     }
 
@@ -423,13 +407,13 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
 
     int lat = hitLatency;
     BlkType *blk = NULL;
-    bool satisfied = access(pkt, blk, lat);
+    PacketList writebacks;
+
+    bool satisfied = access(pkt, blk, lat, writebacks);
 
 #if 0
     /** @todo make the fast write alloc (wh64) work with coherence. */
 
-    PacketList writebacks;
-
     // If this is a block size write/hint (WH64) allocate the block here
     // if the coherence protocol allows it.
     if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
@@ -447,15 +431,11 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
             ++fastWrites;
         }
     }
-
-    // copy writebacks to write buffer
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, time, true);
-        writebacks.pop_front();
-    }
 #endif
 
+    // track time of availability of next prefetch, if any
+    Tick next_pf_time = 0;
+
     bool needsResponse = pkt->needsResponse();
 
     if (satisfied) {
@@ -465,10 +445,14 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         } else {
             delete pkt;
         }
+
+        if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
+            if (blk)
+                blk->status &= ~BlkHWPrefetched;
+            next_pf_time = prefetcher->notify(pkt, time);
+        }
     } else {
         // miss
-        if (prefetchMiss)
-            prefetcher->handleMiss(pkt, time);
 
         Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
         MSHR *mshr = mshrQueue.findMatch(blk_addr);
@@ -476,8 +460,8 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         if (mshr) {
             // MSHR hit
             //@todo remove hw_pf here
-            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
+            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
                 mshr->threadNum = -1;
             }
             mshr->allocateTarget(pkt, time, order++);
@@ -491,7 +475,7 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
             }
         } else {
             // no MSHR
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
             // always mark as cache fill for now... if we implement
             // no-write-allocate or bypass accesses this will have to
             // be changed.
@@ -520,9 +504,23 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
 
                 allocateMissBuffer(pkt, time, true);
             }
+
+            if (prefetcher) {
+                next_pf_time = prefetcher->notify(pkt, time);
+            }
         }
     }
 
+    if (next_pf_time != 0)
+        requestMemSideBus(Request_PF, std::max(time, next_pf_time));
+
+    // copy writebacks to write buffer
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        allocateWriteBuffer(wbPkt, time, true);
+        writebacks.pop_front();
+    }
+
     return true;
 }
 
@@ -610,53 +608,79 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
     // access in timing mode
 
     BlkType *blk = NULL;
+    PacketList writebacks;
 
-    if (!access(pkt, blk, lat)) {
+    if (!access(pkt, blk, lat, writebacks)) {
         // MISS
-        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
+        PacketPtr bus_pkt = getBusPacket(pkt, blk, pkt->needsExclusive());
 
-        bool isCacheFill = (busPkt != NULL);
+        bool is_forward = (bus_pkt == NULL);
 
-        if (busPkt == NULL) {
+        if (is_forward) {
             // just forwarding the same request to the next level
             // no local cache operation involved
-            busPkt = pkt;
+            bus_pkt = pkt;
         }
 
         DPRINTF(Cache, "Sending an atomic %s for %x\n",
-                busPkt->cmdString(), busPkt->getAddr());
+                bus_pkt->cmdString(), bus_pkt->getAddr());
 
 #if TRACING_ON
         CacheBlk::State old_state = blk ? blk->status : 0;
 #endif
 
-        lat += memSidePort->sendAtomic(busPkt);
+        lat += memSidePort->sendAtomic(bus_pkt);
 
         DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                busPkt->cmdString(), busPkt->getAddr(), old_state);
-
-        bool is_error = busPkt->isError();
-        assert(!busPkt->wasNacked());
-
-        if (is_error && pkt->needsResponse()) {
-            pkt->makeAtomicResponse();
-            pkt->copyError(busPkt);
-        } else if (isCacheFill && !is_error) {
-            PacketList writebacks;
-            blk = handleFill(busPkt, blk, writebacks);
-            satisfyCpuSideRequest(pkt, blk);
-            delete busPkt;
-
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
+                bus_pkt->cmdString(), bus_pkt->getAddr(), old_state);
+
+        assert(!bus_pkt->wasNacked());
+
+        // If packet was a forward, the response (if any) is already
+        // in place in the bus_pkt == pkt structure, so we don't need
+        // to do anything.  Otherwise, use the separate bus_pkt to
+        // generate response to pkt and then delete it.
+        if (!is_forward) {
+            if (pkt->needsResponse()) {
+                assert(bus_pkt->isResponse());
+                if (bus_pkt->isError()) {
+                    pkt->makeAtomicResponse();
+                    pkt->copyError(bus_pkt);
+                } else if (bus_pkt->isRead() ||
+                           bus_pkt->cmd == MemCmd::UpgradeResp) {
+                    // we're updating cache state to allow us to
+                    // satisfy the upstream request from the cache
+                    blk = handleFill(bus_pkt, blk, writebacks);
+                    satisfyCpuSideRequest(pkt, blk);
+                } else {
+                    // we're satisfying the upstream request without
+                    // modifying cache state, e.g., a write-through
+                    pkt->makeAtomicResponse();
+                }
             }
+            delete bus_pkt;
         }
     }
 
+    // Note that we don't invoke the prefetcher at all in atomic mode.
+    // It's not clear how to do it properly, particularly for
+    // prefetchers that aggressively generate prefetch candidates and
+    // rely on bandwidth contention to throttle them; these will tend
+    // to pollute the cache in atomic mode since there is no bandwidth
+    // contention.  If we ever do want to enable prefetching in atomic
+    // mode, though, this is the place to do it... see timingAccess()
+    // for an example (though we'd want to issue the prefetch(es)
+    // immediately rather than calling requestMemSideBus() as we do
+    // there).
+
+    // Handle writebacks if needed
+    while (!writebacks.empty()){
+        PacketPtr wbPkt = writebacks.front();
+        memSidePort->sendAtomic(wbPkt);
+        writebacks.pop_front();
+        delete wbPkt;
+    }
+
     // We now have the block one way or another (hit or completed miss)
 
     if (pkt->needsResponse()) {
@@ -742,14 +766,17 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     PacketList writebacks;
 
     if (pkt->req->isUncacheable()) {
-        mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+        mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
             miss_latency;
     } else {
-        mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+        mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
             miss_latency;
     }
 
-    if (mshr->isCacheFill && !is_error) {
+    bool is_fill = !mshr->isForward &&
+        (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp);
+
+    if (is_fill && !is_error) {
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
 
@@ -770,9 +797,10 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     while (mshr->hasTargets()) {
         MSHR::Target *target = mshr->getTarget();
 
-        if (target->isCpuSide()) {
+        switch (target->source) {
+          case MSHR::Target::FromCPU:
             Tick completion_time;
-            if (blk != NULL) {
+            if (is_fill) {
                 satisfyCpuSideRequest(target->pkt, blk);
                 // How many bytes past the first request is this one
                 int transfer_offset =
@@ -786,7 +814,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                     (transfer_offset ? pkt->finishTime : pkt->firstWordTime);
 
                 assert(!target->pkt->req->isUncacheable());
-                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] +=
                     completion_time - target->recvTime;
             } else {
                 // not a cache fill, just forwarding response
@@ -808,13 +836,27 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                 target->pkt->cmd = MemCmd::ReadRespWithInvalidate;
             }
             cpuSidePort->respond(target->pkt, completion_time);
-        } else {
+            break;
+
+          case MSHR::Target::FromPrefetcher:
+            assert(target->pkt->cmd == MemCmd::HardPFReq);
+            if (blk)
+                blk->status |= BlkHWPrefetched;
+            delete target->pkt->req;
+            delete target->pkt;
+            break;
+
+          case MSHR::Target::FromSnoop:
             // I don't believe that a snoop can be in an error state
             assert(!is_error);
             // response to snoop request
             DPRINTF(Cache, "processing deferred snoop...\n");
             handleSnoop(target->pkt, blk, true, true,
                         mshr->pendingInvalidate || pkt->isInvalidate());
+            break;
+
+          default:
+            panic("Illegal target->source enum %d\n", target->source);
         }
 
         mshr->popTarget();
@@ -825,6 +867,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     }
 
     if (mshr->promoteDeferredTargets()) {
+        // avoid later read getting stale data while write miss is
+        // outstanding.. see comment in timingAccess()
+        blk->status &= ~BlkReadable;
         MSHRQueue *mq = mshr->queue;
         mq->markPending(mshr);
         requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
@@ -861,7 +906,7 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
 {
     assert(blk && blk->isValid() && blk->isDirty());
 
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
+    writebacks[0/*pkt->req->threadId()*/]++;
 
     Request *writebackReq =
         new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
@@ -878,7 +923,7 @@ template<class TagStore>
 typename Cache<TagStore>::BlkType*
 Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks)
 {
-    BlkType *blk = tags->findReplacement(addr, writebacks);
+    BlkType *blk = tags->findVictim(addr, writebacks);
 
     if (blk->isValid()) {
         Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
@@ -889,6 +934,7 @@ Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks)
             assert(!blk->isWritable());
             assert(repl_mshr->needsExclusive());
             // too hard to replace block with transient state
+            // allocation failed, block not inserted
             return NULL;
         } else {
             DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
@@ -902,8 +948,6 @@ Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks)
         }
     }
 
-    // Set tag for new block.  Caller is responsible for setting status.
-    blk->tag = tags->extractTag(addr);
     return blk;
 }
 
@@ -934,7 +978,10 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
             assert(!tempBlock->isValid());
             blk = tempBlock;
             tempBlock->set = tags->extractSet(addr);
+            tempBlock->tag = tags->extractTag(addr);
             DPRINTF(Cache, "using temp block for %x\n", addr);
+        } else {
+            tags->insertBlock(addr, blk);
         }
     } else {
         // existing block... probably an upgrade
@@ -1088,6 +1135,11 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
             pkt->makeAtomicResponse();
             pkt->setDataFromBlock(blk->data, blkSize);
         }
+    } else if (is_timing && is_deferred) {
+        // if it's a deferred timing snoop then we've made a copy of
+        // the packet, and so if we're not using that copy to respond
+        // then we need to delete it here.
+        delete pkt;
     }
 
     // Do this last in case it deallocates block data or something
@@ -1156,6 +1208,7 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt)
             if (pkt->isInvalidate()) {
                 // Invalidation trumps our writeback... discard here
                 markInService(mshr);
+                delete wb_pkt;
             }
 
             // If this was a shared writeback, there may still be
@@ -1253,7 +1306,7 @@ Cache<TagStore>::getNextMSHR()
         if (pkt) {
             // Update statistic on number of prefetches issued
             // (hwpf_mshr_misses)
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
             // Don't request bus, since we already have it
             return allocateMissBuffer(pkt, curTick, false);
         }
@@ -1277,7 +1330,7 @@ Cache<TagStore>::getTimingPacket()
     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
     PacketPtr pkt = NULL;
 
-    if (mshr->isSimpleForward()) {
+    if (mshr->isForwardNoResponse()) {
         // no response expected, just forward packet as it is
         assert(tags->findBlock(mshr->addr) == NULL);
         pkt = tgt_pkt;
@@ -1285,11 +1338,10 @@ Cache<TagStore>::getTimingPacket()
         BlkType *blk = tags->findBlock(mshr->addr);
         pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive());
 
-        mshr->isCacheFill = (pkt != NULL);
+        mshr->isForward = (pkt == NULL);
 
-        if (pkt == NULL) {
+        if (mshr->isForward) {
             // not a cache block request, but a response is expected
-            assert(!mshr->isSimpleForward());
             // make copy of current packet to forward, keep current
             // copy for response handling
             pkt = new Packet(tgt_pkt);
@@ -1306,6 +1358,22 @@ Cache<TagStore>::getTimingPacket()
 }
 
 
+template<class TagStore>
+Tick
+Cache<TagStore>::nextMSHRReadyTime()
+{
+    Tick nextReady = std::min(mshrQueue.nextMSHRReadyTime(),
+                              writeBuffer.nextMSHRReadyTime());
+
+    if (prefetcher) {
+        nextReady = std::min(nextReady,
+                             prefetcher->nextPrefetchReadyTime());
+    }
+
+    return nextReady;
+}
+
+
 ///////////////
 //
 // CpuSidePort
@@ -1463,7 +1531,7 @@ Cache<TagStore>::MemSidePort::sendPacket()
             waitingOnRetry = !success;
             if (waitingOnRetry) {
                 DPRINTF(CachePort, "now waiting on a retry\n");
-                if (!mshr->isSimpleForward()) {
+                if (!mshr->isForwardNoResponse()) {
                     delete pkt;
                 }
             } else {
@@ -1481,7 +1549,7 @@ Cache<TagStore>::MemSidePort::sendPacket()
         // @TODO: need to facotr in prefetch requests here somehow
         if (nextReady != MaxTick) {
             DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
-            sendEvent->schedule(std::max(nextReady, curTick + 1));
+            schedule(sendEvent, std::max(nextReady, curTick + 1));
         } else {
             // no more to send right now: if we're draining, we may be done
             if (drainEvent) {
diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index 6537f6343..9ec9c090c 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -64,9 +64,9 @@ MSHR::TargetList::TargetList()
 
 inline void
 MSHR::TargetList::add(PacketPtr pkt, Tick readyTime,
-                      Counter order, bool cpuSide, bool markPending)
+                      Counter order, Target::Source source, bool markPending)
 {
-    if (cpuSide) {
+    if (source != Target::FromSnoop) {
         if (pkt->needsExclusive()) {
             needsExclusive = true;
         }
@@ -84,7 +84,7 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime,
         }
     }
 
-    push_back(Target(pkt, readyTime, order, cpuSide, markPending));
+    push_back(Target(pkt, readyTime, order, source, markPending));
 }
 
 
@@ -141,7 +141,14 @@ print(std::ostream &os, int verbosity, const std::string &prefix) const
 {
     ConstIterator end_i = end();
     for (ConstIterator i = begin(); i != end_i; ++i) {
-        ccprintf(os, "%s%s: ", prefix, i->isCpuSide() ? "cpu" : "mem");
+        const char *s;
+        switch (i->source) {
+          case Target::FromCPU: s = "FromCPU";
+          case Target::FromSnoop: s = "FromSnoop";
+          case Target::FromPrefetcher: s = "FromPrefetcher";
+          default: s = "";
+        }
+        ccprintf(os, "%s%s: ", prefix, s);
         i->pkt->print(os, verbosity, "");
     }
 }
@@ -156,16 +163,18 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     readyTime = whenReady;
     order = _order;
     assert(target);
-    isCacheFill = false;
+    isForward = false;
     _isUncacheable = target->req->isUncacheable();
     inService = false;
     downstreamPending = false;
     threadNum = 0;
     ntargets = 1;
-    // Don't know of a case where we would allocate a new MSHR for a
-    // snoop (mem-side request), so set cpuSide to true here.
     assert(targets->isReset());
-    targets->add(target, whenReady, _order, true, true);
+    // Don't know of a case where we would allocate a new MSHR for a
+    // snoop (mem-side request), so set source according to request here
+    Target::Source source = (target->cmd == MemCmd::HardPFReq) ?
+        Target::FromPrefetcher : Target::FromCPU;
+    targets->add(target, whenReady, _order, source, true);
     assert(deferredTargets->isReset());
     pendingInvalidate = false;
     pendingShared = false;
@@ -187,7 +196,7 @@ bool
 MSHR::markInService()
 {
     assert(!inService);
-    if (isSimpleForward()) {
+    if (isForwardNoResponse()) {
         // we just forwarded the request packet & don't expect a
         // response, so get rid of it
         assert(getNumTargets() == 1);
@@ -230,17 +239,22 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
     //   comes back (but before this target is processed)
     // - the outstanding request is for a non-exclusive block and this
     //   target requires an exclusive block
+
+    // assume we'd never issue a prefetch when we've got an
+    // outstanding miss
+    assert(pkt->cmd != MemCmd::HardPFReq);
+
     if (inService &&
         (!deferredTargets->empty() || pendingInvalidate ||
          (!targets->needsExclusive && pkt->needsExclusive()))) {
         // need to put on deferred list
-        deferredTargets->add(pkt, whenReady, _order, true, true);
+        deferredTargets->add(pkt, whenReady, _order, Target::FromCPU, true);
     } else {
         // No request outstanding, or still OK to append to
         // outstanding request: append to regular target list.  Only
         // mark pending if current request hasn't been issued yet
         // (isn't in service).
-        targets->add(pkt, whenReady, _order, true, !inService);
+        targets->add(pkt, whenReady, _order, Target::FromCPU, !inService);
     }
 
     ++ntargets;
@@ -291,7 +305,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
         PacketPtr cp_pkt = new Packet(pkt, true);
-        targets->add(cp_pkt, curTick, _order, false,
+        targets->add(cp_pkt, curTick, _order, Target::FromSnoop,
                      downstreamPending && targets->needsExclusive);
         ++ntargets;
 
@@ -403,7 +417,8 @@ MSHR::print(std::ostream &os, int verbosity, const std::string &prefix) const
 {
     ccprintf(os, "%s[%x:%x] %s %s %s state: %s %s %s %s\n",
              prefix, addr, addr+size-1,
-             isCacheFill ? "Fill" : "",
+             isForward ? "Forward" : "",
+             isForwardNoResponse() ? "ForwNoResp" : "",
              needsExclusive() ? "Excl" : "",
              _isUncacheable ? "Unc" : "",
              inService ? "InSvc" : "",
diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh
index fdb0485cb..bed7012b0 100644
--- a/src/mem/cache/mshr.hh
+++ b/src/mem/cache/mshr.hh
@@ -55,20 +55,25 @@ class MSHR : public Packet::SenderState, public Printable
 
     class Target {
       public:
+
+        enum Source {
+            FromCPU,
+            FromSnoop,
+            FromPrefetcher
+        };
+
         Tick recvTime;  //!< Time when request was received (for stats)
         Tick readyTime; //!< Time when request is ready to be serviced
         Counter order;  //!< Global order (for memory consistency mgmt)
         PacketPtr pkt;  //!< Pending request packet.
-        bool cpuSide;   //!< Did request come from cpu side or mem side?
+        Source source;  //!< Did request come from cpu, memory, or prefetcher?
         bool markedPending; //!< Did we mark upstream MSHR
                             //!<  as downstreamPending?
 
-        bool isCpuSide() const { return cpuSide; }
-
         Target(PacketPtr _pkt, Tick _readyTime, Counter _order,
-               bool _cpuSide, bool _markedPending)
+               Source _source, bool _markedPending)
             : recvTime(curTick), readyTime(_readyTime), order(_order),
-              pkt(_pkt), cpuSide(_cpuSide), markedPending(_markedPending)
+              pkt(_pkt), source(_source), markedPending(_markedPending)
         {}
     };
 
@@ -85,7 +90,7 @@ class MSHR : public Packet::SenderState, public Printable
         void resetFlags() { needsExclusive = hasUpgrade = false; }
         bool isReset()    { return !needsExclusive && !hasUpgrade; }
         void add(PacketPtr pkt, Tick readyTime, Counter order,
-                 bool cpuSide, bool markPending);
+                 Target::Source source, bool markPending);
         void replaceUpgrades();
         void clearDownstreamPending();
         bool checkFunctional(PacketPtr pkt);
@@ -118,8 +123,8 @@ class MSHR : public Packet::SenderState, public Printable
     /** True if the request has been sent to the bus. */
     bool inService;
 
-    /** True if we will be putting the returned block in the cache */
-    bool isCacheFill;
+    /** True if the request is just a simple forward from an upper level */
+    bool isForward;
 
     /** True if we need to get an exclusive copy of the block. */
     bool needsExclusive() const { return targets->needsExclusive; }
@@ -200,7 +205,7 @@ public:
      * Returns the current number of allocated targets.
      * @return The current number of allocated targets.
      */
-    int getNumTargets() { return ntargets; }
+    int getNumTargets() const { return ntargets; }
 
     /**
      * Returns a pointer to the target list.
@@ -212,13 +217,17 @@ public:
      * Returns true if there are targets left.
      * @return true if there are targets
      */
-    bool hasTargets() { return !targets->empty(); }
+    bool hasTargets() const { return !targets->empty(); }
 
     /**
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Target *getTarget() { assert(hasTargets());  return &targets->front(); }
+    Target *getTarget() const
+    {
+        assert(hasTargets());
+        return &targets->front();
+    }
 
     /**
      * Pop first target.
@@ -229,12 +238,12 @@ public:
         targets->pop_front();
     }
 
-    bool isSimpleForward()
+    bool isForwardNoResponse() const
     {
         if (getNumTargets() != 1)
             return false;
         Target *tgt = getTarget();
-        return tgt->isCpuSide() && !tgt->pkt->needsResponse();
+        return tgt->source == Target::FromCPU && !tgt->pkt->needsResponse();
     }
 
     bool promoteDeferredTargets();
diff --git a/src/mem/cache/mshr_queue.cc b/src/mem/cache/mshr_queue.cc
index 45331c33d..b5c6cc7b8 100644
--- a/src/mem/cache/mshr_queue.cc
+++ b/src/mem/cache/mshr_queue.cc
@@ -230,7 +230,7 @@ MSHRQueue::squash(int threadNum)
         if (mshr->threadNum == threadNum) {
             while (mshr->hasTargets()) {
                 mshr->popTarget();
-                assert(0/*target->req->getThreadNum()*/ == threadNum);
+                assert(0/*target->req->threadId()*/ == threadNum);
             }
             assert(!mshr->hasTargets());
             assert(mshr->ntargets==0);
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index fcc02ff28..365ce6727 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -33,6 +33,7 @@
  * Hardware Prefetcher Definition.
  */
 
+#include "arch/isa_traits.hh"
 #include "base/trace.hh"
 #include "mem/cache/base.hh"
 #include "mem/cache/prefetch/base.hh"
@@ -43,7 +44,7 @@ BasePrefetcher::BasePrefetcher(const BaseCacheParams *p)
     : size(p->prefetcher_size), pageStop(!p->prefetch_past_page),
       serialSquash(p->prefetch_serial_squash),
       cacheCheckPush(p->prefetch_cache_check_push),
-      only_data(p->prefetch_data_accesses_only)
+      onlyData(p->prefetch_data_accesses_only)
 {
 }
 
@@ -52,6 +53,7 @@ BasePrefetcher::setCache(BaseCache *_cache)
 {
     cache = _cache;
     blkSize = cache->getBlockSize();
+    _name = cache->name() + "-pf";
 }
 
 void
@@ -99,7 +101,8 @@ BasePrefetcher::regStats(const std::string &name)
 
     pfSquashed
         .name(name + ".prefetcher.num_hwpf_squashed_from_miss")
-        .desc("number of hwpf that got squashed due to a miss aborting calculation time")
+        .desc("number of hwpf that got squashed due to a miss "
+              "aborting calculation time")
         ;
 }
 
@@ -126,60 +129,79 @@ BasePrefetcher::inMissQueue(Addr addr)
 PacketPtr
 BasePrefetcher::getPacket()
 {
-    DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name());
+    DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n");
 
     if (pf.empty()) {
-        DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name());
+        DPRINTF(HWPrefetch, "No HW_PF found\n");
         return NULL;
     }
 
     PacketPtr pkt;
-    bool keepTrying = false;
+    bool keep_trying = false;
     do {
         pkt = *pf.begin();
         pf.pop_front();
         if (!cacheCheckPush) {
-            keepTrying = cache->inCache(pkt->getAddr());
+            keep_trying = cache->inCache(pkt->getAddr());
         }
+
+        if (keep_trying) {
+            DPRINTF(HWPrefetch, "addr 0x%x in cache, skipping\n",
+                    pkt->getAddr());
+            delete pkt->req;
+            delete pkt;
+        }
+
         if (pf.empty()) {
             cache->deassertMemSideBusRequest(BaseCache::Request_PF);
-            if (keepTrying) return NULL; //None left, all were in cache
+            if (keep_trying) {
+                return NULL; // None left, all were in cache
+            }
         }
-    } while (keepTrying);
+    } while (keep_trying);
 
     pfIssued++;
+    assert(pkt != NULL);
+    DPRINTF(HWPrefetch, "returning 0x%x\n", pkt->getAddr());
     return pkt;
 }
 
-void
-BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
+
+Tick
+BasePrefetcher::notify(PacketPtr &pkt, Tick time)
 {
-    if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data))
-    {
-        //Calculate the blk address
-        Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && onlyData)) {
+        // Calculate the blk address
+        Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
 
-        //Check if miss is in pfq, if so remove it
-        std::list<PacketPtr>::iterator iter = inPrefetch(blkAddr);
+        // Check if miss is in pfq, if so remove it
+        std::list<PacketPtr>::iterator iter = inPrefetch(blk_addr);
         if (iter != pf.end()) {
-            DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name());
+            DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: "
+                    "0x%x, removing it\n", blk_addr);
             pfRemovedMSHR++;
+            delete (*iter)->req;
+            delete (*iter);
             pf.erase(iter);
             if (pf.empty())
                 cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
-        //Remove anything in queue with delay older than time
-        //since everything is inserted in time order, start from end
-        //and work until pf.empty() or time is earlier
-        //This is done to emulate Aborting the previous work on a new miss
-        //Needed for serial calculators like GHB
+        // Remove anything in queue with delay older than time
+        // since everything is inserted in time order, start from end
+        // and work until pf.empty() or time is earlier
+        // This is done to emulate Aborting the previous work on a new miss
+        // Needed for serial calculators like GHB
         if (serialSquash) {
             iter = pf.end();
             iter--;
             while (!pf.empty() && ((*iter)->time >= time)) {
                 pfSquashed++;
-                pf.pop_back();
+                DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n",
+                        (*iter)->getAddr());
+                delete (*iter)->req;
+                delete (*iter);
+                pf.erase(iter);
                 iter--;
             }
             if (pf.empty())
@@ -191,74 +213,70 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
         std::list<Tick> delays;
         calculatePrefetch(pkt, addresses, delays);
 
-        std::list<Addr>::iterator addr = addresses.begin();
-        std::list<Tick>::iterator delay = delays.begin();
-        while (addr != addresses.end())
-        {
-            DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name());
-            //temp calc this here...
+        std::list<Addr>::iterator addrIter = addresses.begin();
+        std::list<Tick>::iterator delayIter = delays.begin();
+        for (; addrIter != addresses.end(); ++addrIter, ++delayIter) {
+            Addr addr = *addrIter;
+
             pfIdentified++;
-            //create a prefetch memreq
-            Request * prefetchReq = new Request(*addr, blkSize, 0);
-            PacketPtr prefetch;
-            prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1);
-            prefetch->allocate();
-            prefetch->req->setThreadContext(pkt->req->getCpuNum(),
-                                            pkt->req->getThreadNum());
-
-            prefetch->time = time + (*delay); //@todo ADD LATENCY HERE
-            //... initialize
-
-            //Check if it is already in the cache
-            if (cacheCheckPush) {
-                if (cache->inCache(prefetch->getAddr())) {
-                    addr++;
-                    delay++;
-                    continue;
-                }
+
+            DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, "
+                    "inserting into prefetch queue with delay %d time %d\n",
+                    addr, *delayIter, time);
+
+            // Check if it is already in the cache
+            if (cacheCheckPush && cache->inCache(addr)) {
+                DPRINTF(HWPrefetch, "Prefetch addr already in cache\n");
+                continue;
             }
 
-            //Check if it is already in the miss_queue
-            if (cache->inMissQueue(prefetch->getAddr())) {
-                addr++;
-                delay++;
+            // Check if it is already in the miss_queue
+            if (cache->inMissQueue(addr)) {
+                DPRINTF(HWPrefetch, "Prefetch addr already in miss queue\n");
                 continue;
             }
 
-            //Check if it is already in the pf buffer
-            if (inPrefetch(prefetch->getAddr()) != pf.end()) {
+            // Check if it is already in the pf buffer
+            if (inPrefetch(addr) != pf.end()) {
                 pfBufferHit++;
-                addr++;
-                delay++;
+                DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n");
                 continue;
             }
 
-            //We just remove the head if we are full
-            if (pf.size() == size)
-            {
-                DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name());
+            // create a prefetch memreq
+            Request *prefetchReq = new Request(*addrIter, blkSize, 0);
+            PacketPtr prefetch =
+                new Packet(prefetchReq, MemCmd::HardPFReq, Packet::Broadcast);
+            prefetch->allocate();
+            prefetch->req->setThreadContext(pkt->req->contextId(),
+                                            pkt->req->threadId());
+
+            prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE
+
+            // We just remove the head if we are full
+            if (pf.size() == size) {
                 pfRemovedFull++;
+                PacketPtr old_pkt = *pf.begin();
+                DPRINTF(HWPrefetch, "Prefetch queue full, "
+                        "removing oldest 0x%x\n", old_pkt->getAddr());
+                delete old_pkt->req;
+                delete old_pkt;
                 pf.pop_front();
             }
 
             pf.push_back(prefetch);
-
-            //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
-
-            //Increment through the list
-            addr++;
-            delay++;
         }
     }
+
+    return pf.empty() ? 0 : pf.front()->time;
 }
 
 std::list<PacketPtr>::iterator
 BasePrefetcher::inPrefetch(Addr address)
 {
-    //Guaranteed to only be one match, we always check before inserting
+    // Guaranteed to only be one match, we always check before inserting
     std::list<PacketPtr>::iterator iter;
-    for (iter=pf.begin(); iter != pf.end(); iter++) {
+    for (iter = pf.begin(); iter != pf.end(); iter++) {
         if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) {
             return iter;
         }
@@ -266,4 +284,8 @@ BasePrefetcher::inPrefetch(Addr address)
     return pf.end();
 }
 
-
+bool
+BasePrefetcher::samePage(Addr a, Addr b)
+{
+    return roundDown(a, TheISA::VMPageSize) == roundDown(b, TheISA::VMPageSize);
+}
diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh
index 1515d8a93..fc027cb3b 100644
--- a/src/mem/cache/prefetch/base.hh
+++ b/src/mem/cache/prefetch/base.hh
@@ -73,30 +73,40 @@ class BasePrefetcher
     bool cacheCheckPush;
 
     /** Do we prefetch on only data reads, or on inst reads as well. */
-    bool only_data;
+    bool onlyData;
+
+    std::string _name;
 
   public:
 
-    Stats::Scalar<> pfIdentified;
-    Stats::Scalar<> pfMSHRHit;
-    Stats::Scalar<> pfCacheHit;
-    Stats::Scalar<> pfBufferHit;
-    Stats::Scalar<> pfRemovedFull;
-    Stats::Scalar<> pfRemovedMSHR;
-    Stats::Scalar<> pfIssued;
-    Stats::Scalar<> pfSpanPage;
-    Stats::Scalar<> pfSquashed;
+    Stats::Scalar pfIdentified;
+    Stats::Scalar pfMSHRHit;
+    Stats::Scalar pfCacheHit;
+    Stats::Scalar pfBufferHit;
+    Stats::Scalar pfRemovedFull;
+    Stats::Scalar pfRemovedMSHR;
+    Stats::Scalar pfIssued;
+    Stats::Scalar pfSpanPage;
+    Stats::Scalar pfSquashed;
 
     void regStats(const std::string &name);
 
   public:
+
     BasePrefetcher(const BaseCacheParams *p);
 
     virtual ~BasePrefetcher() {}
 
+    const std::string name() const { return _name; }
+
     void setCache(BaseCache *_cache);
 
-    void handleMiss(PacketPtr &pkt, Tick time);
+    /**
+     * Notify prefetcher of cache access (may be any access or just
+     * misses, depending on cache parameters.)
+     * @retval Time of next prefetch availability, or 0 if none.
+     */
+    Tick notify(PacketPtr &pkt, Tick time);
 
     bool inCache(Addr addr);
 
@@ -109,11 +119,21 @@ class BasePrefetcher
         return !pf.empty();
     }
 
+    Tick nextPrefetchReadyTime()
+    {
+        return pf.empty() ? MaxTick : pf.front()->time;
+    }
+
     virtual void calculatePrefetch(PacketPtr &pkt,
                                    std::list<Addr> &addresses,
                                    std::list<Tick> &delays) = 0;
 
     std::list<PacketPtr>::iterator inPrefetch(Addr address);
+
+    /**
+     * Utility function: are addresses a and b on the same VM page?
+     */
+    bool samePage(Addr a, Addr b);
 };
 
 
diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc
index f5b88e1a6..f8f7de1db 100644
--- a/src/mem/cache/prefetch/ghb.cc
+++ b/src/mem/cache/prefetch/ghb.cc
@@ -34,39 +34,37 @@
  * GHB Prefetcher implementation.
  */
 
+#include "base/trace.hh"
 #include "mem/cache/prefetch/ghb.hh"
-#include "arch/isa_traits.hh"
 
 void
 GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                                  std::list<Tick> &delays)
 {
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
-    int cpuID = pkt->req->getCpuNum();
-    if (!useCPUId) cpuID = 0;
+    if (useContextId && !pkt->req->hasContextId()) {
+        DPRINTF(HWPrefetch, "ignoring request with no context ID");
+        return;
+    }
 
+    Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    int ctx_id = useContextId ? pkt->req->contextId() : 0;
+    assert(ctx_id < Max_Contexts);
 
-    int new_stride = blkAddr - last_miss_addr[cpuID];
-    int old_stride = last_miss_addr[cpuID] -
-        second_last_miss_addr[cpuID];
+    int new_stride = blk_addr - lastMissAddr[ctx_id];
+    int old_stride = lastMissAddr[ctx_id] - secondLastMissAddr[ctx_id];
 
-    second_last_miss_addr[cpuID] = last_miss_addr[cpuID];
-    last_miss_addr[cpuID] = blkAddr;
+    secondLastMissAddr[ctx_id] = lastMissAddr[ctx_id];
+    lastMissAddr[ctx_id] = blk_addr;
 
     if (new_stride == old_stride) {
-        for (int d=1; d <= degree; d++) {
-            Addr newAddr = blkAddr + d * new_stride;
-            if (this->pageStop &&
-                (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-                (newAddr & ~(TheISA::VMPageSize - 1)))
-            {
-                //Spanned the page, so now stop
-                this->pfSpanPage += degree - d + 1;
+        for (int d = 1; d <= degree; d++) {
+            Addr new_addr = blk_addr + d * new_stride;
+            if (pageStop && !samePage(blk_addr, new_addr)) {
+                // Spanned the page, so now stop
+                pfSpanPage += degree - d + 1;
                 return;
-            }
-            else
-            {
-                addresses.push_back(newAddr);
+            } else {
+                addresses.push_back(new_addr);
                 delays.push_back(latency);
             }
         }
diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh
index 4fb692016..c85221a39 100644
--- a/src/mem/cache/prefetch/ghb.hh
+++ b/src/mem/cache/prefetch/ghb.hh
@@ -42,18 +42,20 @@ class GHBPrefetcher : public BasePrefetcher
 {
   protected:
 
-    Addr second_last_miss_addr[64/*MAX_CPUS*/];
-    Addr last_miss_addr[64/*MAX_CPUS*/];
+    static const int Max_Contexts = 64;
+
+    Addr secondLastMissAddr[Max_Contexts];
+    Addr lastMissAddr[Max_Contexts];
 
     Tick latency;
     int degree;
-    bool useCPUId;
+    bool useContextId;
 
   public:
 
     GHBPrefetcher(const BaseCacheParams *p)
         : BasePrefetcher(p), latency(p->prefetch_latency),
-          degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id)
+          degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id)
     {
     }
 
diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc
index b116b66c7..8af4e615e 100644
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@@ -34,59 +34,97 @@
  * Stride Prefetcher template instantiations.
  */
 
+#include "base/trace.hh"
 #include "mem/cache/prefetch/stride.hh"
 
 void
 StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                                     std::list<Tick> &delays)
 {
-//	Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
-    int cpuID = pkt->req->getCpuNum();
-    if (!useCPUId) cpuID = 0;
-
-    /* Scan Table for IAddr Match */
-/*	std::list<strideEntry*>::iterator iter;
-  for (iter=table[cpuID].begin();
-  iter !=table[cpuID].end();
-  iter++) {
-  if ((*iter)->IAddr == pkt->pc) break;
-  }
-
-  if (iter != table[cpuID].end()) {
-  //Hit in table
-
-  int newStride = blkAddr - (*iter)->MAddr;
-  if (newStride == (*iter)->stride) {
-  (*iter)->confidence++;
-  }
-  else {
-  (*iter)->stride = newStride;
-  (*iter)->confidence--;
-  }
-
-  (*iter)->MAddr = blkAddr;
-
-  for (int d=1; d <= degree; d++) {
-  Addr newAddr = blkAddr + d * newStride;
-  if (this->pageStop &&
-  (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-  (newAddr & ~(TheISA::VMPageSize - 1)))
-  {
-  //Spanned the page, so now stop
-  this->pfSpanPage += degree - d + 1;
-  return;
-  }
-  else
-  {
-  addresses.push_back(newAddr);
-  delays.push_back(latency);
-  }
-  }
-  }
-  else {
-  //Miss in table
-  //Find lowest confidence and replace
-
-  }
-*/
+    if (!pkt->req->hasPC()) {
+        DPRINTF(HWPrefetch, "ignoring request with no PC");
+        return;
+    }
+
+    if (useContextId && !pkt->req->hasContextId()) {
+        DPRINTF(HWPrefetch, "ignoring request with no context ID");
+        return;
+    }
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    int ctx_id = useContextId ? pkt->req->contextId() : 0;
+    Addr pc = pkt->req->getPC();
+    assert(ctx_id < Max_Contexts);
+    std::list<StrideEntry*> &tab = table[ctx_id];
+
+    /* Scan Table for instAddr Match */
+    std::list<StrideEntry*>::iterator iter;
+    for (iter = tab.begin(); iter != tab.end(); iter++) {
+        if ((*iter)->instAddr == pc)
+            break;
+    }
+
+    if (iter != tab.end()) {
+        // Hit in table
+
+        int new_stride = blk_addr - (*iter)->missAddr;
+        bool stride_match = (new_stride == (*iter)->stride);
+
+        if (stride_match && new_stride != 0) {
+            if ((*iter)->confidence < Max_Conf)
+                (*iter)->confidence++;
+        } else {
+            (*iter)->stride = new_stride;
+            if ((*iter)->confidence > Min_Conf)
+                (*iter)->confidence = 0;
+        }
+
+        DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x stride %d (%s), conf %d\n",
+                pc, blk_addr, new_stride, stride_match ? "match" : "change",
+                (*iter)->confidence);
+
+        (*iter)->missAddr = blk_addr;
+
+        if ((*iter)->confidence <= 0)
+            return;
+
+        for (int d = 1; d <= degree; d++) {
+            Addr new_addr = blk_addr + d * new_stride;
+            if (pageStop && !samePage(blk_addr, new_addr)) {
+                // Spanned the page, so now stop
+                pfSpanPage += degree - d + 1;
+                return;
+            } else {
+                DPRINTF(HWPrefetch, "  queuing prefetch to %x @ %d\n",
+                        new_addr, latency);
+                addresses.push_back(new_addr);
+                delays.push_back(latency);
+            }
+        }
+    } else {
+        // Miss in table
+        // Find lowest confidence and replace
+
+        DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x\n", pc, blk_addr);
+
+        if (tab.size() >= 256) { //set default table size is 256
+            std::list<StrideEntry*>::iterator min_pos = tab.begin();
+            int min_conf = (*min_pos)->confidence;
+            for (iter = min_pos, ++iter; iter != tab.end(); ++iter) {
+                if ((*iter)->confidence < min_conf){
+                    min_pos = iter;
+                    min_conf = (*iter)->confidence;
+                }
+            }
+            DPRINTF(HWPrefetch, "  replacing PC %x\n", (*min_pos)->instAddr);
+            tab.erase(min_pos);
+        }
+
+        StrideEntry *new_entry = new StrideEntry;
+        new_entry->instAddr = pc;
+        new_entry->missAddr = blk_addr;
+        new_entry->stride = 0;
+        new_entry->confidence = 0;
+        tab.push_back(new_entry);
+    }
 }
diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index f6bdbc424..6ccd32b91 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -36,41 +36,41 @@
 #ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
 #define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
 
+#include <limits.h>
 #include "mem/cache/prefetch/base.hh"
 
 class StridePrefetcher : public BasePrefetcher
 {
   protected:
 
-    class strideEntry
+    static const int Max_Contexts = 64;
+
+    // These constants need to be changed with the type of the
+    // 'confidence' field below.
+    static const int Max_Conf = INT_MAX;
+    static const int Min_Conf = INT_MIN;
+
+    class StrideEntry
     {
       public:
-        Addr IAddr;
-        Addr MAddr;
+        Addr instAddr;
+        Addr missAddr;
         int stride;
-        int64_t confidence;
-
-/*	bool operator < (strideEntry a,strideEntry b)
-        {
-            if (a.confidence == b.confidence) {
-                return true; //??????
-            }
-            else return a.confidence < b.confidence;
-            }*/
+        int confidence;
     };
-    Addr* lastMissAddr[64/*MAX_CPUS*/];
 
-    std::list<strideEntry*> table[64/*MAX_CPUS*/];
+    Addr *lastMissAddr[Max_Contexts];
+
+    std::list<StrideEntry*> table[Max_Contexts];
     Tick latency;
     int degree;
-    bool useCPUId;
-
+    bool useContextId;
 
   public:
 
     StridePrefetcher(const BaseCacheParams *p)
         : BasePrefetcher(p), latency(p->prefetch_latency),
-          degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id)
+          degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id)
     {
     }
 
diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc
index 6afe1c6c2..a6c2403ba 100644
--- a/src/mem/cache/prefetch/tagged.cc
+++ b/src/mem/cache/prefetch/tagged.cc
@@ -47,20 +47,15 @@ TaggedPrefetcher::
 calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                   std::list<Tick> &delays)
 {
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
+    Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
 
-    for (int d=1; d <= degree; d++) {
-        Addr newAddr = blkAddr + d*(this->blkSize);
-        if (this->pageStop &&
-            (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-            (newAddr & ~(TheISA::VMPageSize - 1)))
-        {
-            //Spanned the page, so now stop
-            this->pfSpanPage += degree - d + 1;
+    for (int d = 1; d <= degree; d++) {
+        Addr newAddr = blkAddr + d*(blkSize);
+        if (pageStop &&  !samePage(blkAddr, newAddr)) {
+            // Spanned the page, so now stop
+            pfSpanPage += degree - d + 1;
             return;
-        }
-        else
-        {
+        } else {
             addresses.push_back(newAddr);
             delays.push_back(latency);
         }
diff --git a/src/mem/cache/tags/SConscript b/src/mem/cache/tags/SConscript
index 9153d97e7..7255e0b7e 100644
--- a/src/mem/cache/tags/SConscript
+++ b/src/mem/cache/tags/SConscript
@@ -34,13 +34,9 @@ Source('base.cc')
 Source('fa_lru.cc')
 Source('iic.cc')
 Source('lru.cc')
-Source('split.cc')
-Source('split_lifo.cc')
-Source('split_lru.cc')
 
 SimObject('iic_repl/Repl.py')
 Source('iic_repl/gen.cc')
 
 TraceFlag('IIC')
 TraceFlag('IICMore')
-TraceFlag('Split')
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index b7b0c7ef0..46c7186b1 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -70,19 +70,19 @@ class BaseTags
      */
 
     /** Number of replacements of valid blocks per thread. */
-    Stats::Vector<> replacements;
+    Stats::Vector replacements;
     /** Per cycle average of the number of tags that hold valid data. */
-    Stats::Average<> tagsInUse;
+    Stats::Average tagsInUse;
 
     /** The total number of references to a block before it is replaced. */
-    Stats::Scalar<> totalRefs;
+    Stats::Scalar totalRefs;
 
     /**
      * The number of reference counts sampled. This is different from
      * replacements because we sample all the valid blocks when the simulator
      * exits.
      */
-    Stats::Scalar<> sampledRefs;
+    Stats::Scalar sampledRefs;
 
     /**
      * Average number of references to a block before is was replaced.
@@ -91,7 +91,7 @@ class BaseTags
     Stats::Formula avgRefs;
 
     /** The cycle that the warmup percentage was hit. */
-    Stats::Scalar<> warmupCycle;
+    Stats::Scalar warmupCycle;
     /**
      * @}
      */
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 607e89a75..f92d4cb37 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -144,14 +144,6 @@ FALRU::hashLookup(Addr addr) const
     return NULL;
 }
 
-bool
-FALRU::probe(Addr addr) const
-{
-    Addr blkAddr = blkAlign(addr);
-    FALRUBlk* blk = hashLookup(blkAddr);
-    return blk && blk->tag == blkAddr && blk->isValid();
-}
-
 void
 FALRU::invalidateBlk(FALRU::BlkType *blk)
 {
@@ -163,7 +155,7 @@ FALRU::invalidateBlk(FALRU::BlkType *blk)
 }
 
 FALRUBlk*
-FALRU::findBlock(Addr addr, int &lat, int *inCache)
+FALRU::accessBlock(Addr addr, int &lat, int *inCache)
 {
     accesses++;
     int tmp_in_cache = 0;
@@ -215,7 +207,7 @@ FALRU::findBlock(Addr addr) const
 }
 
 FALRUBlk*
-FALRU::findReplacement(Addr addr, PacketList &writebacks)
+FALRU::findVictim(Addr addr, PacketList &writebacks)
 {
     FALRUBlk * blk = tail;
     assert(blk->inCache == 0);
@@ -237,6 +229,11 @@ FALRU::findReplacement(Addr addr, PacketList &writebacks)
 }
 
 void
+FALRU::insertBlock(Addr addr, FALRU::BlkType *blk)
+{
+}
+
+void
 FALRU::moveToHead(FALRUBlk *blk)
 {
     int updateMask = blk->inCache ^ cacheMask;
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index cabcf18b4..4eab10c49 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -139,11 +139,11 @@ class FALRU : public BaseTags
      */
 
     /** Hits in each cache size >= 128K. */
-    Stats::Vector<> hits;
+    Stats::Vector hits;
     /** Misses in each cache size >= 128K. */
-    Stats::Vector<> misses;
+    Stats::Vector misses;
     /** Total number of accesses. */
-    Stats::Scalar<> accesses;
+    Stats::Scalar accesses;
 
     /**
      * @}
@@ -165,29 +165,23 @@ public:
     void regStats(const std::string &name);
 
     /**
-     * Return true if the address is found in the cache.
-     * @param asid The address space ID.
-     * @param addr The address to look for.
-     * @return True if the address is in the cache.
-     */
-    bool probe(Addr addr) const;
-
-    /**
      * Invalidate a cache block.
      * @param blk The block to invalidate.
      */
     void invalidateBlk(BlkType *blk);
 
     /**
-     * Find the block in the cache and update the replacement data. Returns
-     * the access latency and the in cache flags as a side effect
+     * Access block and update replacement data.  May not succeed, in which case
+     * NULL pointer is returned.  This has all the implications of a cache
+     * access and should only be used as such.
+     * Returns the access latency and inCache flags as a side effect.
      * @param addr The address to look for.
      * @param asid The address space ID.
      * @param lat The latency of the access.
      * @param inCache The FALRUBlk::inCache flags.
      * @return Pointer to the cache block.
      */
-    FALRUBlk* findBlock(Addr addr, int &lat, int *inCache = 0);
+    FALRUBlk* accessBlock(Addr addr, int &lat, int *inCache = 0);
 
     /**
      * Find the block in the cache, do not update the replacement data.
@@ -203,7 +197,9 @@ public:
      * @param writebacks List for any writebacks to be performed.
      * @return The block to place the replacement in.
      */
-    FALRUBlk* findReplacement(Addr addr, PacketList & writebacks);
+    FALRUBlk* findVictim(Addr addr, PacketList & writebacks);
+
+    void insertBlock(Addr addr, BlkType *blk);
 
     /**
      * Return the hit latency of this cache.
@@ -283,31 +279,6 @@ public:
     {
         return (tag);
     }
-
-    /**
-     * Read the data out of the internal storage of a cache block. FALRU
-     * currently doesn't support data storage.
-     * @param blk The cache block to read.
-     * @param data The buffer to read the data into.
-     * @return The data from the cache block.
-     */
-    void readData(FALRUBlk *blk, uint8_t *data)
-    {
-    }
-
-    /**
-     * Write data into the internal storage of a cache block. FALRU
-     * currently doesn't support data storage.
-     * @param blk The cache block to be written.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(FALRUBlk *blk, uint8_t *data, int size,
-                   PacketList &writebacks)
-    {
-    }
 };
 
 #endif
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index 2825599f6..7bc2543c5 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -219,15 +219,9 @@ IIC::regStats(const string &name)
         ;
 }
 
-// probe cache for presence of given block.
-bool
-IIC::probe(Addr addr) const
-{
-    return (findBlock(addr) != NULL);
-}
 
 IICTag*
-IIC::findBlock(Addr addr, int &lat)
+IIC::accessBlock(Addr addr, int &lat)
 {
     Addr tag = extractTag(addr);
     unsigned set = hash(addr);
@@ -303,7 +297,7 @@ IIC::findBlock(Addr addr) const
 
 
 IICTag*
-IIC::findReplacement(Addr addr, PacketList &writebacks)
+IIC::findVictim(Addr addr, PacketList &writebacks)
 {
     DPRINTF(IIC, "Finding Replacement for %x\n", addr);
     unsigned set = hash(addr);
@@ -346,6 +340,11 @@ IIC::findReplacement(Addr addr, PacketList &writebacks)
 }
 
 void
+IIC::insertBlock(Addr addr, BlkType* blk)
+{
+}
+
+void
 IIC::freeReplacementBlock(PacketList & writebacks)
 {
     IICTag *tag_ptr;
@@ -365,7 +364,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
         tag_ptr->refCount = 0;
 
         if (tag_ptr->isDirty()) {
-/*	    PacketPtr writeback =
+/*          PacketPtr writeback =
                 buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0),
                                   tag_ptr->req->asid, tag_ptr->xc, blkSize,
                                   tag_ptr->data,
@@ -635,66 +634,6 @@ IIC::invalidateBlk(IIC::BlkType *tag_ptr)
 }
 
 void
-IIC::readData(IICTag *blk, uint8_t *data)
-{
-    assert(blk->size <= trivialSize || blk->numData > 0);
-    int data_size = blk->size;
-    if (data_size > trivialSize) {
-        for (int i = 0; i < blk->numData; ++i){
-            memcpy(data+i*subSize,
-                   &(dataBlks[blk->data_ptr[i]][0]),
-                   (data_size>subSize)?subSize:data_size);
-            data_size -= subSize;
-        }
-    } else {
-        memcpy(data,blk->trivialData,data_size);
-    }
-}
-
-void
-IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
-               PacketList & writebacks)
-{
-    DPRINTF(IIC, "Writing %d bytes to %x\n", size,
-            blk->tag<<tagShift);
-    // Find the number of subblocks needed, (round up)
-    int num_subs = (size + (subSize -1))/subSize;
-    if (size <= trivialSize) {
-        num_subs = 0;
-    }
-    assert(num_subs <= numSub);
-    if (num_subs > blk->numData) {
-        // need to allocate more data blocks
-        for (int i = blk->numData; i < num_subs; ++i){
-            blk->data_ptr[i] = getFreeDataBlock(writebacks);
-            dataReferenceCount[blk->data_ptr[i]] += 1;
-        }
-    } else if (num_subs < blk->numData){
-        // can free data blocks
-        for (int i=num_subs; i < blk->numData; ++i){
-            // decrement reference count and compare to zero
-            if (--dataReferenceCount[blk->data_ptr[i]] == 0) {
-                freeDataBlock(blk->data_ptr[i]);
-            }
-        }
-    }
-
-    blk->numData = num_subs;
-    blk->size = size;
-    assert(size <= trivialSize || blk->numData > 0);
-    if (size > trivialSize){
-        for (int i = 0; i < blk->numData; ++i){
-            memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize,
-                   (size>subSize)?subSize:size);
-            size -= subSize;
-        }
-    } else {
-        memcpy(blk->trivialData,write_data,size);
-    }
-}
-
-
-void
 IIC::cleanupRefs()
 {
     for (int i = 0; i < numTags; ++i) {
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index c9d080683..45c8ee801 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -248,20 +248,20 @@ class IIC : public BaseTags
      */
 
     /** Hash hit depth of cache hits. */
-    Stats::Distribution<> hitHashDepth;
+    Stats::Distribution hitHashDepth;
     /** Hash depth for cache misses. */
-    Stats::Distribution<> missHashDepth;
+    Stats::Distribution missHashDepth;
     /** Count of accesses to each hash set. */
-    Stats::Distribution<> setAccess;
+    Stats::Distribution setAccess;
 
     /** The total hash depth for every miss. */
-    Stats::Scalar<> missDepthTotal;
+    Stats::Scalar missDepthTotal;
     /** The total hash depth for all hits. */
-    Stats::Scalar<> hitDepthTotal;
+    Stats::Scalar hitDepthTotal;
     /** The number of hash misses. */
-    Stats::Scalar<> hashMiss;
+    Stats::Scalar hashMiss;
     /** The number of hash hits. */
-    Stats::Scalar<> hashHit;
+    Stats::Scalar hashHit;
     /** @} */
 
   public:
@@ -385,14 +385,6 @@ class IIC : public BaseTags
     }
 
     /**
-     * Check for the address in the tagstore.
-     * @param asid The address space ID.
-     * @param addr The address to find.
-     * @return true if it is found.
-     */
-    bool probe(Addr addr) const;
-
-    /**
      * Swap the position of two tags.
      * @param index1 The first tag location.
      * @param index2 The second tag location.
@@ -418,14 +410,16 @@ class IIC : public BaseTags
     void invalidateBlk(BlkType *blk);
 
     /**
-     * Find the block and update the replacement data. This call also returns
-     * the access latency as a side effect.
+     * Access block and update replacement data.  May not succeed, in which case
+     * NULL pointer is returned.  This has all the implications of a cache
+     * access and should only be used as such.
+     * Returns the access latency and inCache flags as a side effect.
      * @param addr The address to find.
      * @param asid The address space ID.
      * @param lat The access latency.
      * @return A pointer to the block found, if any.
      */
-    IICTag* findBlock(Addr addr, int &lat);
+    IICTag* accessBlock(Addr addr, int &lat);
 
     /**
      * Find the block, do not update the replacement data.
@@ -441,31 +435,15 @@ class IIC : public BaseTags
      * @param writebacks List for any writebacks to be performed.
      * @return The block to place the replacement in.
      */
-    IICTag* findReplacement(Addr addr, PacketList &writebacks);
+    IICTag* findVictim(Addr addr, PacketList &writebacks);
 
-    /**
-     * Read the data from the internal storage of the given cache block.
-     * @param blk The block to read the data from.
-     * @param data The buffer to read the data into.
-     * @return The cache block's data.
-     */
-    void readData(IICTag *blk, uint8_t *data);
-
-    /**
-     * Write the data into the internal storage of the given cache block.
-     * @param blk The block to write to.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(IICTag *blk, uint8_t *data, int size,
-                   PacketList & writebacks);
+    void insertBlock(Addr addr, BlkType *blk);
 
     /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
+
 private:
     /**
      * Return the hash of the address.
diff --git a/src/mem/cache/tags/iic_repl/gen.hh b/src/mem/cache/tags/iic_repl/gen.hh
index 22436b384..fe105d95a 100644
--- a/src/mem/cache/tags/iic_repl/gen.hh
+++ b/src/mem/cache/tags/iic_repl/gen.hh
@@ -162,11 +162,11 @@ class GenRepl : public Repl
      * @{
      */
     /** The number of replacements from each pool. */
-    Stats::Distribution<> repl_pool;
+    Stats::Distribution repl_pool;
     /** The number of advances out of each pool. */
-    Stats::Distribution<> advance_pool;
+    Stats::Distribution advance_pool;
     /** The number of demotions from each pool. */
-    Stats::Distribution<> demote_pool;
+    Stats::Distribution demote_pool;
     /**
      * @}
      */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 7f352e9c4..ff353ff6a 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -113,7 +113,7 @@ LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) :
     // allocate data storage in one big chunk
     dataBlks = new uint8_t[numSets*assoc*blkSize];
 
-    blkIndex = 0;	// index into blks array
+    blkIndex = 0;       // index into blks array
     for (i = 0; i < numSets; ++i) {
         sets[i].assoc = assoc;
 
@@ -150,21 +150,8 @@ LRU::~LRU()
     delete [] sets;
 }
 
-// probe cache for presence of given block.
-bool
-LRU::probe(Addr addr) const
-{
-    //  return(findBlock(Read, addr, asid) != 0);
-    Addr tag = extractTag(addr);
-    unsigned myset = extractSet(addr);
-
-    LRUBlk *blk = sets[myset].findBlk(tag);
-
-    return (blk != NULL);	// true if in cache
-}
-
 LRUBlk*
-LRU::findBlock(Addr addr, int &lat)
+LRU::accessBlock(Addr addr, int &lat)
 {
     Addr tag = extractTag(addr);
     unsigned set = extractSet(addr);
@@ -196,12 +183,11 @@ LRU::findBlock(Addr addr) const
 }
 
 LRUBlk*
-LRU::findReplacement(Addr addr, PacketList &writebacks)
+LRU::findVictim(Addr addr, PacketList &writebacks)
 {
     unsigned set = extractSet(addr);
     // grab a replacement candidate
     LRUBlk *blk = sets[set].blks[assoc-1];
-    sets[set].moveToHead(blk);
     if (blk->isValid()) {
         replacements[0]++;
         totalRefs += blk->refCount;
@@ -210,7 +196,14 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
 
         DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
                 set, regenerateBlkAddr(blk->tag, set));
-    } else if (!blk->isTouched) {
+    }
+    return blk;
+}
+
+void
+LRU::insertBlock(Addr addr, LRU::BlkType *blk)
+{
+    if (!blk->isTouched) {
         tagsInUse++;
         blk->isTouched = true;
         if (!warmedUp && tagsInUse.value() >= warmupBound) {
@@ -219,7 +212,11 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         }
     }
 
-    return blk;
+    // Set tag for new block.  Caller is responsible for setting status.
+    blk->tag = extractTag(addr);
+
+    unsigned set = extractSet(addr);
+    sets[set].moveToHead(blk);
 }
 
 void
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index ea5606cde..7b6e95e84 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -127,7 +127,7 @@ public:
      * @param _assoc The associativity of the cache.
      * @param _hit_latency The latency in cycles for a hit.
      */
-    LRU(int _numSets, int _blkSize,	int _assoc, int _hit_latency);
+    LRU(int _numSets, int _blkSize,     int _assoc, int _hit_latency);
 
     /**
      * Destructor
@@ -154,31 +154,25 @@ public:
     }
 
     /**
-     * Search for the address in the cache.
-     * @param asid The address space ID.
-     * @param addr The address to find.
-     * @return True if the address is in the cache.
-     */
-    bool probe(Addr addr) const;
-
-    /**
      * Invalidate the given block.
      * @param blk The block to invalidate.
      */
     void invalidateBlk(BlkType *blk);
 
     /**
-     * Finds the given address in the cache and update replacement data.
-     * Returns the access latency as a side effect.
+     * Access block and update replacement data.  May not succeed, in which case
+     * NULL pointer is returned.  This has all the implications of a cache
+     * access and should only be used as such. Returns the access latency as a side effect.
      * @param addr The address to find.
      * @param asid The address space ID.
      * @param lat The access latency.
      * @return Pointer to the cache block if found.
      */
-    LRUBlk* findBlock(Addr addr, int &lat);
+    LRUBlk* accessBlock(Addr addr, int &lat);
 
     /**
      * Finds the given address in the cache, do not update replacement data.
+     * i.e. This is a no-side-effect find of a block.
      * @param addr The address to find.
      * @param asid The address space ID.
      * @return Pointer to the cache block if found.
@@ -186,12 +180,20 @@ public:
     LRUBlk* findBlock(Addr addr) const;
 
     /**
-     * Find a replacement block for the address provided.
-     * @param pkt The request to a find a replacement candidate for.
+     * Find a block to evict for the address provided.
+     * @param addr The addr to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @return The block to place the replacement in.
+     * @return The candidate block.
      */
-    LRUBlk* findReplacement(Addr addr, PacketList &writebacks);
+    LRUBlk* findVictim(Addr addr, PacketList &writebacks);
+
+    /**
+     * Insert the new block into the cache.  For LRU this means inserting into
+     * the MRU position of the set.
+     * @param addr The address to update.
+     * @param blk The block to update.
+     */
+     void insertBlock(Addr addr, BlkType *blk);
 
     /**
      * Generate the tag from the given address.
@@ -254,33 +256,6 @@ public:
     }
 
     /**
-     * Read the data out of the internal storage of the given cache block.
-     * @param blk The cache block to read.
-     * @param data The buffer to read the data into.
-     * @return The cache block's data.
-     */
-    void readData(LRUBlk *blk, uint8_t *data)
-    {
-        std::memcpy(data, blk->data, blk->size);
-    }
-
-    /**
-     * Write data into the internal storage of the given cache block. Since in
-     * LRU does not store data differently this just needs to update the size.
-     * @param blk The cache block to write.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(LRUBlk *blk, uint8_t *data, int size,
-                   PacketList & writebacks)
-    {
-        assert(size <= blkSize);
-        blk->size = size;
-    }
-
-    /**
      * Called at end of simulation to complete average block reference stats.
      */
     virtual void cleanupRefs();
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
deleted file mode 100644
index 0df85cc92..000000000
--- a/src/mem/cache/tags/split.cc
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Definitions of split cache tag store.
- */
-
-#include <string>
-#include <iostream>
-#include <fstream>
-
-#include "base/cprintf.hh"
-#include "base/intmath.hh"
-#include "base/output.hh"
-#include "base/trace.hh"
-#include "mem/cache/base.hh"
-#include "mem/cache/tags/split.hh"
-#include "mem/cache/tags/split_lifo.hh"
-#include "mem/cache/tags/split_lru.hh"
-
-
-using namespace std;
-using namespace TheISA;
-
-// create and initialize a partitioned cache structure
-Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc,
-             bool _lifo, bool _two_queue, int _hit_latency) :
-    numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency)
-{
-    DPRINTF(Split, "new split cache!!\n");
-
-    DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n",
-            numSets, blkSize, LRU1_assoc, hitLatency);
-
-    lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1);
-
-    if (total_ways - LRU1_assoc == 0) {
-        lifo_net = NULL;
-        lru_net = NULL;
-    } else {
-        if (lifo) {
-            DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n",
-                    (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc));
-            lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize,
-                                     (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2);
-            lru_net = NULL;
-        }
-        else {
-            DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc);
-            lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2);
-            lifo_net = NULL;
-        }
-    }
-
-    blkMask = blkSize - 1;
-
-    if (!isPowerOf2(total_ways))
-        warn("total cache ways/columns %d should be power of 2",
-             total_ways);
-
-    warmedUp = false;
-    /** @todo Make warmup percentage a parameter. */
-    warmupBound = numSets * total_ways;
-
-}
-
-Split::~Split()
-{
-    delete lru;
-    if (lifo)
-        delete lifo_net;
-    else
-        delete lru_net;
-}
-
-void
-Split::regStats(const string &name)
-{
-    using namespace Stats;
-
-    BaseTags::regStats(name);
-
-    usedEvictDist.init(0,3000,40);
-    unusedEvictDist.init(0,3000,40);
-    useByCPUCycleDist.init(0,35,1);
-
-    nic_repl
-        .name(name + ".nic_repl")
-        .desc("number of replacements in the nic partition")
-        .precision(0)
-        ;
-
-    cpu_repl
-        .name(name + ".cpu_repl")
-        .desc("number of replacements in the cpu partition")
-        .precision(0)
-        ;
-
-    lru->regStats(name + ".lru");
-
-    if (lifo && lifo_net) {
-        lifo_net->regStats(name + ".lifo_net");
-    } else if (lru_net) {
-        lru_net->regStats(name + ".lru_net");
-    }
-
-    nicUsedWhenEvicted
-        .name(name + ".nicUsedWhenEvicted")
-        .desc("number of NIC blks that were used before evicted")
-        ;
-
-    nicUsedTotLatency
-        .name(name + ".nicUsedTotLatency")
-        .desc("total cycles before eviction of used NIC blks")
-        ;
-
-    nicUsedTotEvicted
-        .name(name + ".nicUsedTotEvicted")
-        .desc("total number of used NIC blks evicted")
-        ;
-
-    nicUsedAvgLatency
-        .name(name + ".nicUsedAvgLatency")
-        .desc("avg number of cycles a used NIC blk is in cache")
-        .precision(0)
-        ;
-    nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted;
-
-    usedEvictDist
-        .name(name + ".usedEvictDist")
-        .desc("distribution of used NIC blk eviction times")
-        .flags(pdf | cdf)
-        ;
-
-    nicUnusedWhenEvicted
-        .name(name + ".nicUnusedWhenEvicted")
-        .desc("number of NIC blks that were unused when evicted")
-        ;
-
-    nicUnusedTotLatency
-        .name(name + ".nicUnusedTotLatency")
-        .desc("total cycles before eviction of unused NIC blks")
-        ;
-
-    nicUnusedTotEvicted
-        .name(name + ".nicUnusedTotEvicted")
-        .desc("total number of unused NIC blks evicted")
-        ;
-
-    nicUnusedAvgLatency
-        .name(name + ".nicUnusedAvgLatency")
-        .desc("avg number of cycles an unused NIC blk is in cache")
-        .precision(0)
-        ;
-    nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted;
-
-    unusedEvictDist
-        .name(name + ".unusedEvictDist")
-        .desc("distribution of unused NIC blk eviction times")
-        .flags(pdf | cdf)
-        ;
-
-    nicUseByCPUCycleTotal
-        .name(name + ".nicUseByCPUCycleTotal")
-        .desc("total latency of NIC blks til usage time")
-        ;
-
-    nicBlksUsedByCPU
-        .name(name + ".nicBlksUsedByCPU")
-        .desc("total number of NIC blks used")
-        ;
-
-    nicAvgUsageByCPULatency
-        .name(name + ".nicAvgUsageByCPULatency")
-        .desc("average number of cycles before a NIC blk that is used gets used")
-        .precision(0)
-        ;
-    nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU;
-
-    useByCPUCycleDist
-        .name(name + ".useByCPUCycleDist")
-        .desc("the distribution of cycle time in cache before NIC blk is used")
-        .flags(pdf | cdf)
-        ;
-
-    cpuUsedBlks
-        .name(name + ".cpuUsedBlks")
-        .desc("number of cpu blks that were used before evicted")
-        ;
-
-    cpuUnusedBlks
-        .name(name + ".cpuUnusedBlks")
-        .desc("number of cpu blks that were unused before evicted")
-        ;
-
-    nicAvgLatency
-        .name(name + ".nicAvgLatency")
-        .desc("avg number of cycles a NIC blk is in cache before evicted")
-        .precision(0)
-        ;
-    nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) /
-        (nicUnusedTotEvicted + nicUsedTotEvicted);
-
-    NR_CP_hits
-        .name(name + ".NR_CP_hits")
-        .desc("NIC requests hitting in CPU Partition")
-        ;
-
-    NR_NP_hits
-        .name(name + ".NR_NP_hits")
-        .desc("NIC requests hitting in NIC Partition")
-        ;
-
-    CR_CP_hits
-        .name(name + ".CR_CP_hits")
-        .desc("CPU requests hitting in CPU partition")
-        ;
-
-    CR_NP_hits
-        .name(name + ".CR_NP_hits")
-        .desc("CPU requests hitting in NIC partition")
-        ;
-
-}
-
-// probe cache for presence of given block.
-bool
-Split::probe(Addr addr) const
-{
-    bool success = lru->probe(addr);
-    if (!success) {
-        if (lifo && lifo_net)
-            success = lifo_net->probe(addr);
-        else if (lru_net)
-            success = lru_net->probe(addr);
-    }
-
-    return success;
-}
-
-
-SplitBlk*
-Split::findBlock(Addr addr, int &lat)
-{
-    SplitBlk *blk = lru->findBlock(addr, lat);
-    if (!blk) {
-        if (lifo && lifo_net) {
-            blk = lifo_net->findBlock(addr, lat);
-        } else if (lru_net) {
-            blk = lru_net->findBlock(addr, lat);
-        }
-    }
-
-    return blk;
-}
-
-SplitBlk*
-Split::findBlock(Addr addr) const
-{
-    SplitBlk *blk = lru->findBlock(addr);
-    if (!blk) {
-        if (lifo && lifo_net) {
-            blk = lifo_net->findBlock(addr);
-        } else if (lru_net) {
-            blk = lru_net->findBlock(addr);
-        }
-    }
-
-    return blk;
-}
-
-SplitBlk*
-Split::findReplacement(Addr addr, PacketList &writebacks)
-{
-    SplitBlk *blk = NULL;
-
-    assert(0);
-#if 0
-    if (pkt->nic_pkt()) {
-        DPRINTF(Split, "finding a replacement for nic_req\n");
-        nic_repl++;
-        if (lifo && lifo_net)
-            blk = lifo_net->findReplacement(addr, writebacks);
-        else if (lru_net)
-            blk = lru_net->findReplacement(addr, writebacks);
-        // in this case, this is an LRU only cache, it's non partitioned
-        else
-            blk = lru->findReplacement(addr, writebacks);
-    } else {
-        DPRINTF(Split, "finding replacement for cpu_req\n");
-        blk = lru->findReplacement(addr, writebacks);
-        cpu_repl++;
-    }
-
-    Tick latency = curTick - blk->ts;
-    if (blk->isNIC) {
-        if (blk->isUsed) {
-            nicUsedWhenEvicted++;
-            usedEvictDist.sample(latency);
-            nicUsedTotLatency += latency;
-            nicUsedTotEvicted++;
-        } else {
-            nicUnusedWhenEvicted++;
-            unusedEvictDist.sample(latency);
-            nicUnusedTotLatency += latency;
-            nicUnusedTotEvicted++;
-        }
-    } else {
-        if (blk->isUsed) {
-            cpuUsedBlks++;
-        } else {
-            cpuUnusedBlks++;
-        }
-    }
-
-    // blk attributes for the new blk coming IN
-    blk->ts = curTick;
-    blk->isNIC = (pkt->nic_pkt()) ? true : false;
-#endif
-
-    return blk;
-}
-
-void
-Split::invalidateBlk(Split::BlkType *blk)
-{
-    if (!blk) {
-        fatal("FIXME!\n");
-#if 0
-        if (lifo && lifo_net)
-            blk = lifo_net->findBlock(addr);
-        else if (lru_net)
-            blk = lru_net->findBlock(addr);
-#endif
-
-        if (!blk)
-            return;
-    }
-
-    blk->status = 0;
-    blk->isTouched = false;
-    tagsInUse--;
-}
-
-void
-Split::cleanupRefs()
-{
-    lru->cleanupRefs();
-    if (lifo && lifo_net)
-        lifo_net->cleanupRefs();
-    else if (lru_net)
-        lru_net->cleanupRefs();
-
-    ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(),
-                      ios::trunc);
-
-    // this shouldn't be here but it happens at the end, which is what i want
-    memIter end = memHash.end();
-    for (memIter iter = memHash.begin(); iter != end; ++iter) {
-        ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second);
-    }
-}
-
-Addr
-Split::regenerateBlkAddr(Addr tag, int set) const
-{
-    if (lifo_net)
-        return lifo_net->regenerateBlkAddr(tag, set);
-    else
-        return lru->regenerateBlkAddr(tag, set);
-}
-
-Addr
-Split::extractTag(Addr addr) const
-{
-    // need to fix this if we want to use it... old interface of
-    // passing in blk was too weird
-    assert(0);
-    return 0;
-/*
-    if (blk->part == 2) {
-        if (lifo_net)
-            return lifo_net->extractTag(addr);
-        else if (lru_net)
-            return lru_net->extractTag(addr);
-        else
-            panic("this shouldn't happen");
-    } else
-        return lru->extractTag(addr);
-*/
-}
-
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
deleted file mode 100644
index e8954f791..000000000
--- a/src/mem/cache/tags/split.hh
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Declaration of a split/partitioned tag store.
- */
-
-#ifndef __SPLIT_HH__
-#define __SPLIT_HH__
-
-#include <cstring>
-#include <list>
-
-#include "mem/cache/blk.hh" // base class
-#include "mem/cache/tags/split_blk.hh"
-#include "mem/packet.hh" // for inlined functions
-#include <assert.h>
-#include "mem/cache/tags/base.hh"
-#include "base/hashmap.hh"
-
-class BaseCache;
-class SplitLRU;
-class SplitLIFO;
-
-/**
- * A  cache tag store.
- */
-class Split : public BaseTags
-{
-  public:
-    /** Typedef the block type used in this tag store. */
-    typedef SplitBlk BlkType;
-    /** Typedef for a list of pointers to the local block class. */
-    typedef std::list<SplitBlk*> BlkList;
-  protected:
-    /** The number of sets in the cache. */
-    const int numSets;
-    /** The number of bytes in a block. */
-    const int blkSize;
-    /** Whether the 2nd partition (for the nic) is LIFO or not */
-    const bool lifo;
-    /** The hit latency. */
-    const int hitLatency;
-
-    Addr blkMask;
-
-    /** Number of NIC requests that hit in the NIC partition */
-    Stats::Scalar<> NR_NP_hits;
-    /** Number of NIC requests that hit in the CPU partition */
-    Stats::Scalar<> NR_CP_hits;
-    /** Number of CPU requests that hit in the NIC partition */
-    Stats::Scalar<> CR_NP_hits;
-    /** Number of CPU requests that hit in the CPU partition */
-    Stats::Scalar<> CR_CP_hits;
-    /** The number of nic replacements (i.e. misses) */
-    Stats::Scalar<> nic_repl;
-    /** The number of cpu replacements (i.e. misses) */
-    Stats::Scalar<> cpu_repl;
-
-    //For latency studies
-    /** the number of NIC blks that were used before evicted */
-    Stats::Scalar<> nicUsedWhenEvicted;
-    /** the total latency of used NIC blocks in the cache */
-    Stats::Scalar<> nicUsedTotLatency;
-    /** the total number of used NIC blocks evicted */
-    Stats::Scalar<> nicUsedTotEvicted;
-    /** the average number of cycles a used NIC blk is in the cache */
-    Stats::Formula nicUsedAvgLatency;
-    /** the Distribution of used NIC blk eviction times */
-    Stats::Distribution<> usedEvictDist;
-
-    /** the number of NIC blks that were unused before evicted */
-    Stats::Scalar<> nicUnusedWhenEvicted;
-    /** the total latency of unused NIC blks in the cache */
-    Stats::Scalar<> nicUnusedTotLatency;
-    /** the total number of unused NIC blocks evicted */
-    Stats::Scalar<> nicUnusedTotEvicted;
-    /** the average number of cycles an unused NIC blk is in the cache */
-    Stats::Formula nicUnusedAvgLatency;
-    /** the Distribution of unused NIC blk eviction times */
-    Stats::Distribution<> unusedEvictDist;
-
-    /** The total latency of NIC blocks to 1st usage time by CPU */
-    Stats::Scalar<> nicUseByCPUCycleTotal;
-    /** The total number of NIC blocks used */
-    Stats::Scalar<> nicBlksUsedByCPU;
-    /** the average number of cycles before a NIC blk that is used gets used by CPU */
-    Stats::Formula nicAvgUsageByCPULatency;
-    /** the Distribution of cycles time before a NIC blk is used by CPU*/
-    Stats::Distribution<> useByCPUCycleDist;
-
-    /** the number of CPU blks that were used before evicted */
-    Stats::Scalar<> cpuUsedBlks;
-    /** the number of CPU blks that were unused before evicted */
-    Stats::Scalar<> cpuUnusedBlks;
-
-    /** the avg number of cycles before a NIC blk is evicted */
-    Stats::Formula nicAvgLatency;
-
-    typedef m5::hash_map<Addr, int, m5::hash<Addr> > hash_t;
-    typedef hash_t::const_iterator memIter;
-    hash_t memHash;
-
-
-  private:
-    SplitLRU *lru;
-    SplitLRU *lru_net;
-    SplitLIFO *lifo_net;
-
-  public:
-    /**
-     * Construct and initialize this tag store.
-     * @param _numSets The number of sets in the cache.
-     * @param _blkSize The number of bytes in a block.
-     * @param _assoc The associativity of the cache.
-     * @param _hit_latency The latency in cycles for a hit.
-     */
-    Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc,
-          bool _lifo, bool _two_queue, int _hit_latency);
-
-    /**
-     * Destructor
-     */
-    virtual ~Split();
-
-    /**
-     * Register the stats for this object
-     * @param name The name to prepend to the stats name.
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Return the block size.
-     * @return the block size.
-     */
-    int getBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Return the subblock size. In the case of Split it is always the block
-     * size.
-     * @return The block size.
-     */
-    int getSubBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Search for the address in the cache.
-     * @param asid The address space ID.
-     * @param addr The address to find.
-     * @return True if the address is in the cache.
-     */
-    bool probe(Addr addr) const;
-
-    /**
-     * Invalidate the given block.
-     * @param blk The block to invalidate.
-     */
-    void invalidateBlk(BlkType *blk);
-
-    /**
-     * Finds the given address in the cache and update replacement data.
-     * Returns the access latency as a side effect.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @param lat The access latency.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr, int &lat);
-
-    /**
-     * Finds the given address in the cache, do not update replacement data.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr) const;
-
-    /**
-     * Find a replacement block for the address provided.
-     * @param pkt The request to a find a replacement candidate for.
-     * @param writebacks List for any writebacks to be performed.
-     * @return The block to place the replacement in.
-     */
-    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
-
-
-    /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr) const;
-
-    /**
-     * Calculate the set index from the address.
-     * @param addr The address to get the set from.
-     * @return The set index of the address.
-     */
-    int extractSet(Addr addr) const
-    {
-        panic("should never call this!\n");
-        M5_DUMMY_RETURN
-    }
-
-    /**
-     * Get the block offset from an address.
-     * @param addr The address to get the offset of.
-     * @return The block offset.
-     */
-    int extractBlkOffset(Addr addr) const
-    {
-        return (addr & blkMask);
-    }
-
-    /**
-     * Align an address to the block size.
-     * @param addr the address to align.
-     * @return The block address.
-     */
-    Addr blkAlign(Addr addr) const
-    {
-        return (addr & ~(Addr) (blkMask));
-    }
-
-    /**
-     * Regenerate the block address from the tag.
-     * @param tag The tag of the block.
-     * @param set The set of the block.
-     * @return The block address.
-     */
-    Addr regenerateBlkAddr(Addr tag, int set) const;
-
-    /**
-     * Return the hit latency.
-     * @return the hit latency.
-     */
-    int getHitLatency() const
-    {
-        return hitLatency;
-    }
-
-    /**
-     * Read the data out of the internal storage of the given cache block.
-     * @param blk The cache block to read.
-     * @param data The buffer to read the data into.
-     * @return The cache block's data.
-     */
-    void readData(SplitBlk *blk, uint8_t *data)
-    {
-        std::memcpy(data, blk->data, blk->size);
-    }
-
-    /**
-     * Write data into the internal storage of the given cache block. Since in
-     * Split does not store data differently this just needs to update the size.
-     * @param blk The cache block to write.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(SplitBlk *blk, uint8_t *data, int size,
-                   PacketList & writebacks)
-    {
-        assert(size <= blkSize);
-        blk->size = size;
-    }
-
-    /**
-     * Called at end of simulation to complete average block reference stats.
-     */
-    virtual void cleanupRefs();
-};
-
-#endif
diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh
deleted file mode 100644
index d2efe08df..000000000
--- a/src/mem/cache/tags/split_blk.hh
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Declaration of partitioned tag store cache block class.
- */
-
-#ifndef __SPLIT_BLK_HH__
-#define __SPLIT_BLK_HH__
-
-#include "mem/cache/blk.hh" // base class
-
-/**
- * Split cache block.
- */
-class SplitBlk : public CacheBlk {
-  public:
-    /** Has this block been touched? Used to aid calculation of warmup time. */
-    bool isTouched;
-    /** Has this block been used after being brought in? (for LIFO partition) */
-    bool isUsed;
-    /** is this blk a NIC block? (i.e. requested by the NIC) */
-    bool isNIC;
-    /** timestamp of the arrival of this block into the cache */
-    Tick ts;
-    /** the previous block in the LIFO partition (brought in before than me) */
-    SplitBlk *prev;
-    /** the next block in the LIFO partition (brought in later than me) */
-    SplitBlk *next;
-    /** which partition this block is in */
-    int part;
-
-    SplitBlk()
-        : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL),
-          part(0)
-    {}
-};
-
-#endif
-
diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc
deleted file mode 100644
index 3bdc7cae9..000000000
--- a/src/mem/cache/tags/split_lifo.cc
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Definitions of LIFO tag store usable in a partitioned cache.
- */
-
-#include <string>
-
-#include "mem/cache/base.hh"
-#include "base/intmath.hh"
-#include "mem/cache/tags/split_lifo.hh"
-#include "sim/core.hh"
-#include "base/trace.hh"
-
-using namespace std;
-
-SplitBlk*
-LIFOSet::findBlk(Addr tag) const
-{
-    for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) {
-        if (blk->tag == tag && blk->isValid()) {
-            return blk;
-        }
-    }
-    return NULL;
-}
-
-void
-LIFOSet::moveToLastIn(SplitBlk *blk)
-{
-    if (blk == lastIn)
-        return;
-
-    if (blk == firstIn) {
-        blk->next->prev = NULL;
-    } else {
-        blk->prev->next = blk->next;
-        blk->next->prev = blk->prev;
-    }
-    blk->next = NULL;
-    blk->prev = lastIn;
-    lastIn->next = blk;
-
-    lastIn = blk;
-}
-
-void
-LIFOSet::moveToFirstIn(SplitBlk *blk)
-{
-    if (blk == firstIn)
-        return;
-
-    if (blk == lastIn) {
-        blk->prev->next = NULL;
-    } else {
-        blk->next->prev = blk->prev;
-        blk->prev->next = blk->next;
-    }
-
-    blk->prev = NULL;
-    blk->next = firstIn;
-    firstIn->prev = blk;
-
-    firstIn = blk;
-}
-
-// create and initialize a LIFO cache structure
-SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) :
-    blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways),
-    hitLatency(_hit_latency), twoQueue(two_Queue), part(_part)
-{
-    if (!isPowerOf2(blkSize))
-        fatal("cache block size (in bytes) must be a power of 2");
-    if (!(hitLatency > 0))
-        fatal("access latency in cycles must be at least on cycle");
-    if (_ways == 0)
-        fatal("if instantiating a splitLIFO, needs non-zero size!");
-
-
-    SplitBlk  *blk;
-    int i, j, blkIndex;
-
-    setShift = floorLog2(blkSize);
-    blkMask = blkSize - 1;
-    setMask = numSets - 1;
-    tagShift = setShift + floorLog2(numSets);
-
-    warmedUp = false;
-    /** @todo Make warmup percentage a parameter. */
-    warmupBound = size/blkSize;
-
-    // allocate data blocks
-    blks = new SplitBlk[numBlks];
-    sets = new LIFOSet[numSets];
-    dataBlks = new uint8_t[size];
-
-/*
-    // these start off point to same blk
-    top = &(blks[0]);
-    head = top;
-*/
-
-    blkIndex = 0;
-    for (i=0; i < numSets; ++i) {
-        sets[i].ways = ways;
-        sets[i].lastIn = &blks[blkIndex];
-        sets[i].firstIn = &blks[blkIndex + ways - 1];
-
-        /* 3 cases:  if there is 1 way, if there are 2 ways, or if there are 3+.
-           in the case of 1 way, last in and first out point to the same blocks,
-           and the next and prev pointers need to be assigned specially.  and so on
-        */
-        /* deal with the first way */
-        blk = &blks[blkIndex];
-        blk->prev = &blks[blkIndex + 1];
-        blk->next = NULL;
-        blk->data = &dataBlks[blkSize*blkIndex];
-        blk->size = blkSize;
-        blk->part = part;
-        blk->set = i;
-        ++blkIndex;
-
-        /* if there are "middle" ways, do them here */
-        if (ways > 2) {
-            for (j=1; j < ways-1; ++j) {
-                blk = &blks[blkIndex];
-                blk->data = &dataBlks[blkSize*blkIndex];
-                blk->prev = &blks[blkIndex+1];
-                blk->next = &blks[blkIndex-1];
-                blk->data = &(dataBlks[blkSize*blkIndex]);
-                blk->size = blkSize;
-                blk->part = part;
-                blk->set = i;
-                ++blkIndex;
-            }
-        }
-
-        /* do the final way here, depending on whether the final way is the only
-           way or not
-        */
-        if (ways > 1) {
-            blk =  &blks[blkIndex];
-            blk->prev = NULL;
-            blk->next = &blks[blkIndex - 1];
-            blk->data = &dataBlks[blkSize*blkIndex];
-            blk->size = blkSize;
-            blk->part = part;
-            blk->set = i;
-            ++blkIndex;
-        } else {
-            blk->prev = NULL;
-        }
-    }
-    assert(blkIndex == numBlks);
-}
-
-SplitLIFO::~SplitLIFO()
-{
-    delete [] blks;
-    delete [] sets;
-    delete [] dataBlks;
-}
-
-void
-SplitLIFO::regStats(const std::string &name)
-{
-    BaseTags::regStats(name);
-
-    hits
-        .name(name + ".hits")
-        .desc("number of hits on this partition")
-        .precision(0)
-        ;
-
-    misses
-        .name(name + ".misses")
-        .desc("number of misses in this partition")
-        .precision(0)
-        ;
-
-    invalidations
-        .name(name + ".invalidations")
-        .desc("number of invalidations in this partition")
-        .precision(0)
-        ;
-}
-
-// probe cache for presence of given block.
-bool
-SplitLIFO::probe(Addr addr) const
-{
-    Addr tag = extractTag(addr);
-    unsigned myset = extractSet(addr);
-
-    SplitBlk* blk = sets[myset].findBlk(tag);
-    return (blk != NULL);
-}
-
-SplitBlk*
-SplitLIFO::findBlock(Addr addr, int &lat)
-{
-    Addr tag = extractTag(addr);
-    unsigned set = extractSet(addr);
-    SplitBlk *blk = sets[set].findBlk(tag);
-
-    lat = hitLatency;
-
-    if (blk) {
-        DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n",
-                addr, set, tag);
-        hits++;
-
-        if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency)
-            lat = blk->whenReady - curTick;
-        blk->refCount +=1;
-
-        if (twoQueue) {
-            blk->isUsed = true;
-            sets[set].moveToFirstIn(blk);
-        } else {
-            sets[set].moveToLastIn(blk);
-        }
-    }
-
-    return blk;
-}
-
-
-SplitBlk*
-SplitLIFO::findBlock(Addr addr) const
-{
-    Addr tag = extractTag(addr);
-    unsigned set = extractSet(addr);
-    SplitBlk *blk = sets[set].findBlk(tag);
-
-    return blk;
-}
-
-SplitBlk*
-SplitLIFO::findReplacement(Addr addr, PacketList &writebacks)
-{
-    unsigned set = extractSet(addr);
-
-    SplitBlk *firstIn = sets[set].firstIn;
-    SplitBlk *lastIn = sets[set].lastIn;
-
-    SplitBlk *blk;
-    if (twoQueue && firstIn->isUsed) {
-        blk = firstIn;
-        blk->isUsed = false;
-        sets[set].moveToLastIn(blk);
-    } else {
-        int withValue = sets[set].withValue;
-        if (withValue == ways) {
-            blk = lastIn;
-        } else {
-            blk = &(sets[set].firstIn[ways - ++withValue]);
-        }
-    }
-
-    DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n",
-            addr, regenerateBlkAddr(blk->tag, set), blk->status);
-    if (blk->isValid()) {
-        replacements[0]++;
-        totalRefs += blk->refCount;
-        ++sampledRefs;
-        blk->refCount = 0;
-    } else {
-        tagsInUse++;
-        blk->isTouched = true;
-        if (!warmedUp && tagsInUse.value() >= warmupBound) {
-            warmedUp = true;
-            warmupCycle = curTick;
-        }
-    }
-
-    misses++;
-
-    return blk;
-}
-
-void
-SplitLIFO::invalidateBlk(SplitLIFO::BlkType *blk)
-{
-    if (blk) {
-        blk->status = 0;
-        blk->isTouched = false;
-        tagsInUse--;
-        invalidations++;
-    }
-}
-
-void
-SplitLIFO::cleanupRefs()
-{
-    for (int i = 0; i < numBlks; ++i) {
-        if (blks[i].isValid()) {
-            totalRefs += blks[i].refCount;
-            ++sampledRefs;
-        }
-    }
-}
diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh
deleted file mode 100644
index 0fd5f5c3c..000000000
--- a/src/mem/cache/tags/split_lifo.hh
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Declaration of a LIFO tag store usable in a partitioned cache.
- */
-
-#ifndef __SPLIT_LIFO_HH__
-#define __SPLIT_LIFO_HH__
-
-#include <cstring>
-#include <list>
-
-#include "mem/cache/blk.hh" // base class
-#include "mem/cache/tags/split_blk.hh"
-#include "mem/packet.hh" // for inlined functions
-#include "base/hashmap.hh"
-#include <assert.h>
-#include "mem/cache/tags/base.hh"
-
-class BaseCache;
-
-/**
- * A LIFO set of cache blks
- */
-class LIFOSet {
-  public:
-    /** the number of blocks in this set */
-    int ways;
-
-    /** Cache blocks in this set, maintained in LIFO order where
-        0 = Last in (head) */
-    SplitBlk *lastIn;
-    SplitBlk *firstIn;
-
-    /** has the initial "filling" of this set finished? i.e., have you had
-     * 'ways' number of compulsory misses in this set yet? if withValue == ways,
-     * then yes.  withValue is meant to be the number of blocks in the set that have
-     * gone through their first compulsory miss.
-     */
-    int withValue;
-
-    /**
-     * Find a block matching the tag in this set.
-     * @param asid The address space ID.
-     * @param tag the Tag you are looking for
-     * @return Pointer to the block, if found, NULL otherwise
-     */
-    SplitBlk* findBlk(Addr tag) const;
-
-    void moveToLastIn(SplitBlk *blk);
-    void moveToFirstIn(SplitBlk *blk);
-
-    LIFOSet()
-        : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0)
-    {}
-};
-
-/**
- * A LIFO cache tag store.
- */
-class SplitLIFO : public BaseTags
-{
-  public:
-    /** Typedef the block type used in this tag store. */
-    typedef SplitBlk BlkType;
-    /** Typedef for a list of pointers to the local block class. */
-    typedef std::list<SplitBlk*> BlkList;
-  protected:
-    /** The number of bytes in a block. */
-    const int blkSize;
-    /** the size of the cache in bytes */
-    const int size;
-    /** the number of blocks in the cache */
-    const int numBlks;
-    /** the number of sets in the cache */
-    const int numSets;
-    /** the number of ways in the cache */
-    const int ways;
-    /** The hit latency. */
-    const int hitLatency;
-    /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */
-    const bool twoQueue;
-    /** indicator for which partition this is */
-    const int part;
-
-    /** The cache blocks. */
-    SplitBlk *blks;
-    /** The Cache sets */
-    LIFOSet *sets;
-    /** The data blocks, 1 per cache block. */
-    uint8_t *dataBlks;
-
-    /** The amount to shift the address to get the set. */
-    int setShift;
-    /** The amount to shift the address to get the tag. */
-    int tagShift;
-    /** Mask out all bits that aren't part of the set index. */
-    unsigned setMask;
-    /** Mask out all bits that aren't part of the block offset. */
-    unsigned blkMask;
-
-
-    /** the number of hit in this partition */
-    Stats::Scalar<> hits;
-    /** the number of blocks brought into this partition (i.e. misses) */
-    Stats::Scalar<> misses;
-    /** the number of invalidations in this partition */
-    Stats::Scalar<> invalidations;
-
-public:
-    /**
-     * Construct and initialize this tag store.
-     * @param _numSets The number of sets in the cache.
-     * @param _blkSize The number of bytes in a block.
-     * @param _assoc The associativity of the cache.
-     * @param _hit_latency The latency in cycles for a hit.
-     */
-    SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part);
-
-    /**
-     * Destructor
-     */
-    virtual ~SplitLIFO();
-
-    /**
-     * Register the statistics for this object
-     * @param name The name to precede the stat
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Return the block size.
-     * @return the block size.
-     */
-    int getBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Return the subblock size. In the case of LIFO it is always the block
-     * size.
-     * @return The block size.
-     */
-    int getSubBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Search for the address in the cache.
-     * @param asid The address space ID.
-     * @param addr The address to find.
-     * @return True if the address is in the cache.
-     */
-    bool probe( Addr addr) const;
-
-    /**
-     * Invalidate the given block.
-     * @param blk The block to invalidate.
-     */
-    void invalidateBlk(BlkType *blk);
-
-    /**
-     * Finds the given address in the cache and update replacement data.
-     * Returns the access latency as a side effect.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @param lat The access latency.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr, int &lat);
-
-    /**
-     * Finds the given address in the cache, do not update replacement data.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr) const;
-
-    /**
-     * Find a replacement block for the address provided.
-     * @param pkt The request to a find a replacement candidate for.
-     * @param writebacks List for any writebacks to be performed.
-     * @return The block to place the replacement in.
-     */
-    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
-
-    /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr) const
-    {
-        return (addr >> tagShift);
-    }
-
-   /**
-     * Calculate the set index from the address.
-     * @param addr The address to get the set from.
-     * @return The set index of the address.
-     */
-    int extractSet(Addr addr) const
-    {
-        return ((addr >> setShift) & setMask);
-    }
-
-    /**
-     * Get the block offset from an address.
-     * @param addr The address to get the offset of.
-     * @return The block offset.
-     */
-    int extractBlkOffset(Addr addr) const
-    {
-        return (addr & blkMask);
-    }
-
-    /**
-     * Align an address to the block size.
-     * @param addr the address to align.
-     * @return The block address.
-     */
-    Addr blkAlign(Addr addr) const
-    {
-        return (addr & ~(Addr)blkMask);
-    }
-
-    /**
-     * Regenerate the block address from the tag.
-     * @param tag The tag of the block.
-     * @param set The set of the block.
-     * @return The block address.
-     */
-    Addr regenerateBlkAddr(Addr tag, unsigned set) const
-    {
-        return ((tag << tagShift) | ((Addr)set << setShift));
-    }
-
-    /**
-     * Return the hit latency.
-     * @return the hit latency.
-     */
-    int getHitLatency() const
-    {
-        return hitLatency;
-    }
-
-    /**
-     * Read the data out of the internal storage of the given cache block.
-     * @param blk The cache block to read.
-     * @param data The buffer to read the data into.
-     * @return The cache block's data.
-     */
-    void readData(SplitBlk *blk, uint8_t *data)
-    {
-        std::memcpy(data, blk->data, blk->size);
-    }
-
-    /**
-     * Write data into the internal storage of the given cache block. Since in
-     * LIFO does not store data differently this just needs to update the size.
-     * @param blk The cache block to write.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(SplitBlk *blk, uint8_t *data, int size,
-                   PacketList & writebacks)
-    {
-        assert(size <= blkSize);
-        blk->size = size;
-    }
-
-    /**
-     * Called at end of simulation to complete average block reference stats.
-     */
-    virtual void cleanupRefs();
-};
-
-#endif
diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc
deleted file mode 100644
index bcccdcb30..000000000
--- a/src/mem/cache/tags/split_lru.cc
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Definitions of LRU tag store for a partitioned cache.
- */
-
-#include <string>
-
-#include "mem/cache/base.hh"
-#include "base/intmath.hh"
-#include "mem/cache/tags/split_lru.hh"
-#include "sim/core.hh"
-
-using namespace std;
-
-SplitBlk*
-SplitCacheSet::findBlk(Addr tag) const
-{
-    for (int i = 0; i < assoc; ++i) {
-        if (blks[i]->tag == tag && blks[i]->isValid()) {
-            return blks[i];
-        }
-    }
-    return 0;
-}
-
-
-void
-SplitCacheSet::moveToHead(SplitBlk *blk)
-{
-    // nothing to do if blk is already head
-    if (blks[0] == blk)
-        return;
-
-    // write 'next' block into blks[i], moving up from MRU toward LRU
-    // until we overwrite the block we moved to head.
-
-    // start by setting up to write 'blk' into blks[0]
-    int i = 0;
-    SplitBlk *next = blk;
-
-    do {
-        assert(i < assoc);
-        // swap blks[i] and next
-        SplitBlk *tmp = blks[i];
-        blks[i] = next;
-        next = tmp;
-        ++i;
-    } while (next != blk);
-}
-
-
-// create and initialize a LRU/MRU cache structure
-SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) :
-    numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part)
-{
-    // Check parameters
-    if (blkSize < 4 || !isPowerOf2(blkSize)) {
-        fatal("Block size must be at least 4 and a power of 2");
-    }
-    if (numSets <= 0 || !isPowerOf2(numSets)) {
-        fatal("# of sets must be non-zero and a power of 2");
-    }
-    if (assoc <= 0) {
-        fatal("associativity must be greater than zero");
-    }
-    if (hitLatency <= 0) {
-        fatal("access latency must be greater than zero");
-    }
-
-    SplitBlk  *blk;
-    int i, j, blkIndex;
-
-    blkMask = blkSize - 1;
-    setShift = floorLog2(blkSize);
-    setMask = numSets - 1;
-    tagShift = setShift + floorLog2(numSets);
-    warmedUp = false;
-    /** @todo Make warmup percentage a parameter. */
-    warmupBound = numSets * assoc;
-
-    sets = new SplitCacheSet[numSets];
-    blks = new SplitBlk[numSets * assoc];
-    // allocate data storage in one big chunk
-    dataBlks = new uint8_t[numSets*assoc*blkSize];
-
-    blkIndex = 0;	// index into blks array
-    for (i = 0; i < numSets; ++i) {
-        sets[i].assoc = assoc;
-
-        sets[i].blks = new SplitBlk*[assoc];
-
-        // link in the data blocks
-        for (j = 0; j < assoc; ++j) {
-            // locate next cache block
-            blk = &blks[blkIndex];
-            blk->data = &dataBlks[blkSize*blkIndex];
-            ++blkIndex;
-
-            // invalidate new cache block
-            blk->status = 0;
-
-            //EGH Fix Me : do we need to initialize blk?
-
-            // Setting the tag to j is just to prevent long chains in the hash
-            // table; won't matter because the block is invalid
-            blk->tag = j;
-            blk->whenReady = 0;
-            blk->isTouched = false;
-            blk->size = blkSize;
-            sets[i].blks[j]=blk;
-            blk->set = i;
-            blk->part = part;
-        }
-    }
-}
-
-SplitLRU::~SplitLRU()
-{
-    delete [] dataBlks;
-    delete [] blks;
-    delete [] sets;
-}
-
-void
-SplitLRU::regStats(const std::string &name)
-{
-    BaseTags::regStats(name);
-
-    hits
-        .name(name + ".hits")
-        .desc("number of hits on this partition")
-        .precision(0)
-        ;
-
-    misses
-        .name(name + ".misses")
-        .desc("number of misses in this partition")
-        .precision(0)
-        ;
-}
-
-// probe cache for presence of given block.
-bool
-SplitLRU::probe(Addr addr) const
-{
-    //  return(findBlock(Read, addr, asid) != 0);
-    Addr tag = extractTag(addr);
-    unsigned myset = extractSet(addr);
-
-    SplitBlk *blk = sets[myset].findBlk(tag);
-
-    return (blk != NULL);	// true if in cache
-}
-
-SplitBlk*
-SplitLRU::findBlock(Addr addr, int &lat)
-{
-    Addr tag = extractTag(addr);
-    unsigned set = extractSet(addr);
-    SplitBlk *blk = sets[set].findBlk(tag);
-    lat = hitLatency;
-    if (blk != NULL) {
-        // move this block to head of the MRU list
-        sets[set].moveToHead(blk);
-        if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){
-            lat = blk->whenReady - curTick;
-        }
-        blk->refCount += 1;
-        hits++;
-    }
-
-    return blk;
-}
-
-
-SplitBlk*
-SplitLRU::findBlock(Addr addr) const
-{
-    Addr tag = extractTag(addr);
-    unsigned set = extractSet(addr);
-    SplitBlk *blk = sets[set].findBlk(tag);
-    return blk;
-}
-
-SplitBlk*
-SplitLRU::findReplacement(Addr addr, PacketList &writebacks)
-{
-    unsigned set = extractSet(addr);
-    // grab a replacement candidate
-    SplitBlk *blk = sets[set].blks[assoc-1];
-    sets[set].moveToHead(blk);
-    if (blk->isValid()) {
-        replacements[0]++;
-        totalRefs += blk->refCount;
-        ++sampledRefs;
-        blk->refCount = 0;
-    } else if (!blk->isTouched) {
-        tagsInUse++;
-        blk->isTouched = true;
-        if (!warmedUp && tagsInUse.value() >= warmupBound) {
-            warmedUp = true;
-            warmupCycle = curTick;
-        }
-    }
-
-    misses++;
-
-    return blk;
-}
-
-void
-SplitLRU::invalidateBlk(SplitLRU::BlkType *blk)
-{
-    if (blk) {
-        blk->status = 0;
-        blk->isTouched = false;
-        tagsInUse--;
-    }
-}
-
-void
-SplitLRU::cleanupRefs()
-{
-    for (int i = 0; i < numSets*assoc; ++i) {
-        if (blks[i].isValid()) {
-            totalRefs += blks[i].refCount;
-            ++sampledRefs;
-        }
-    }
-}
diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh
deleted file mode 100644
index d41b6efa7..000000000
--- a/src/mem/cache/tags/split_lru.hh
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Lisa Hsu
- */
-
-/**
- * @file
- * Declaration of a LRU tag store for a partitioned cache.
- */
-
-#ifndef __SPLIT_LRU_HH__
-#define __SPLIT_LRU_HH__
-
-#include <cstring>
-#include <list>
-
-#include "mem/cache/blk.hh" // base class
-#include "mem/cache/tags/split_blk.hh"
-#include "mem/packet.hh" // for inlined functions
-#include <assert.h>
-#include "mem/cache/tags/base.hh"
-
-class BaseCache;
-
-/**
- * An associative set of cache blocks.
- */
-
-class SplitCacheSet
-{
-  public:
-    /** The associativity of this set. */
-    int assoc;
-
-    /** Cache blocks in this set, maintained in LRU order 0 = MRU. */
-    SplitBlk **blks;
-
-    /**
-     * Find a block matching the tag in this set.
-     * @param asid The address space ID.
-     * @param tag The Tag to find.
-     * @return Pointer to the block if found.
-     */
-    SplitBlk* findBlk(Addr tag) const;
-
-    /**
-     * Move the given block to the head of the list.
-     * @param blk The block to move.
-     */
-    void moveToHead(SplitBlk *blk);
-};
-
-/**
- * A LRU cache tag store.
- */
-class SplitLRU : public BaseTags
-{
-  public:
-    /** Typedef the block type used in this tag store. */
-    typedef SplitBlk BlkType;
-    /** Typedef for a list of pointers to the local block class. */
-    typedef std::list<SplitBlk*> BlkList;
-  protected:
-    /** The number of sets in the cache. */
-    const int numSets;
-    /** The number of bytes in a block. */
-    const int blkSize;
-    /** The associativity of the cache. */
-    const int assoc;
-    /** The hit latency. */
-    const int hitLatency;
-    /** indicator for which partition this is */
-    const int part;
-
-    /** The cache sets. */
-    SplitCacheSet *sets;
-
-    /** The cache blocks. */
-    SplitBlk *blks;
-    /** The data blocks, 1 per cache block. */
-    uint8_t *dataBlks;
-
-    /** The amount to shift the address to get the set. */
-    int setShift;
-    /** The amount to shift the address to get the tag. */
-    int tagShift;
-    /** Mask out all bits that aren't part of the set index. */
-    unsigned setMask;
-    /** Mask out all bits that aren't part of the block offset. */
-    unsigned blkMask;
-
-    /** number of hits in this partition */
-    Stats::Scalar<> hits;
-    /** number of blocks brought into this partition (i.e. misses) */
-    Stats::Scalar<> misses;
-
-public:
-    /**
-     * Construct and initialize this tag store.
-     * @param _numSets The number of sets in the cache.
-     * @param _blkSize The number of bytes in a block.
-     * @param _assoc The associativity of the cache.
-     * @param _hit_latency The latency in cycles for a hit.
-     */
-    SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part);
-
-    /**
-     * Destructor
-     */
-    virtual ~SplitLRU();
-
-    /**
-     * Register the statistics for this object
-     * @param name The name to precede the stat
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Return the block size.
-     * @return the block size.
-     */
-    int getBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Return the subblock size. In the case of LRU it is always the block
-     * size.
-     * @return The block size.
-     */
-    int getSubBlockSize()
-    {
-        return blkSize;
-    }
-
-    /**
-     * Search for the address in the cache.
-     * @param asid The address space ID.
-     * @param addr The address to find.
-     * @return True if the address is in the cache.
-     */
-    bool probe(Addr addr) const;
-
-    /**
-     * Invalidate the given block.
-     * @param blk The block to invalidate.
-     */
-    void invalidateBlk(BlkType *blk);
-
-    /**
-     * Finds the given address in the cache and update replacement data.
-     * Returns the access latency as a side effect.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @param lat The access latency.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr, int &lat);
-
-    /**
-     * Finds the given address in the cache, do not update replacement data.
-     * @param addr The address to find.
-     * @param asid The address space ID.
-     * @return Pointer to the cache block if found.
-     */
-    SplitBlk* findBlock(Addr addr) const;
-
-    /**
-     * Find a replacement block for the address provided.
-     * @param pkt The request to a find a replacement candidate for.
-     * @param writebacks List for any writebacks to be performed.
-     * @return The block to place the replacement in.
-     */
-    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
-
-    /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr) const
-    {
-        return (addr >> tagShift);
-    }
-
-    /**
-     * Calculate the set index from the address.
-     * @param addr The address to get the set from.
-     * @return The set index of the address.
-     */
-    int extractSet(Addr addr) const
-    {
-        return ((addr >> setShift) & setMask);
-    }
-
-    /**
-     * Get the block offset from an address.
-     * @param addr The address to get the offset of.
-     * @return The block offset.
-     */
-    int extractBlkOffset(Addr addr) const
-    {
-        return (addr & blkMask);
-    }
-
-    /**
-     * Align an address to the block size.
-     * @param addr the address to align.
-     * @return The block address.
-     */
-    Addr blkAlign(Addr addr) const
-    {
-        return (addr & ~(Addr)blkMask);
-    }
-
-    /**
-     * Regenerate the block address from the tag.
-     * @param tag The tag of the block.
-     * @param set The set of the block.
-     * @return The block address.
-     */
-    Addr regenerateBlkAddr(Addr tag, unsigned set) const
-    {
-        return ((tag << tagShift) | ((Addr)set << setShift));
-    }
-
-    /**
-     * Return the hit latency.
-     * @return the hit latency.
-     */
-    int getHitLatency() const
-    {
-        return hitLatency;
-    }
-
-    /**
-     * Read the data out of the internal storage of the given cache block.
-     * @param blk The cache block to read.
-     * @param data The buffer to read the data into.
-     * @return The cache block's data.
-     */
-    void readData(SplitBlk *blk, uint8_t *data)
-    {
-        std::memcpy(data, blk->data, blk->size);
-    }
-
-    /**
-     * Write data into the internal storage of the given cache block. Since in
-     * LRU does not store data differently this just needs to update the size.
-     * @param blk The cache block to write.
-     * @param data The data to write.
-     * @param size The number of bytes to write.
-     * @param writebacks A list for any writebacks to be performed. May be
-     * needed when writing to a compressed block.
-     */
-    void writeData(SplitBlk *blk, uint8_t *data, int size,
-                   PacketList & writebacks)
-    {
-        assert(size <= blkSize);
-        blk->size = size;
-    }
-
-    /**
-     * Called at end of simulation to complete average block reference stats.
-     */
-    virtual void cleanupRefs();
-};
-
-#endif