20 files changed, 126 insertions, 23 deletions
diff --git a/configs/example/se.py b/configs/example/se.py
index c490ed6b6..7c09bcc5c 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -151,6 +151,7 @@ if options.l2cache:
     system.tol2bus = Bus()
     system.l2.cpu_side = system.tol2bus.port
     system.l2.mem_side = system.membus.port
+    system.l2.num_cpus = np
 
 for i in xrange(np):
     if options.caches:
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 5ded05400..dffac2234 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -44,6 +44,7 @@ class BaseCache(MemObject):
     prioritizeRequests = Param.Bool(False,
         "always service demand misses first")
     repl = Param.Repl(NULL, "replacement policy")
+    num_cpus =  Param.Int(1, "number of cpus sharing this cache")
     size = Param.MemorySize("capacity in bytes")
     forward_snoops = Param.Bool(True,
         "forward snoops from mem side to cpu side")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index fe1f580bd..70bc51cda 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -62,7 +62,8 @@ BaseCache::BaseCache(const Params *p)
       noTargetMSHR(NULL),
       missCount(p->max_miss_count),
       drainEvent(NULL),
-      addrRange(p->addr_range)
+      addrRange(p->addr_range),
+      _numCpus(p->num_cpus)
 {
 }
 
@@ -148,7 +149,11 @@ BaseCache::regStats()
         const string &cstr = cmd.toString();
 
         hits[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
             .name(name() + "." + cstr + "_hits")
             .desc("number of " + cstr + " hits")
             .flags(total | nozero | nonan)
@@ -185,7 +190,11 @@ BaseCache::regStats()
         const string &cstr = cmd.toString();
 
         misses[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
             .name(name() + "." + cstr + "_misses")
             .desc("number of " + cstr + " misses")
             .flags(total | nozero | nonan)
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index c245fecd2..62e8ae126 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -47,6 +47,7 @@
 #include "base/statistics.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
+#include "config/full_system.hh"
 #include "mem/cache/mshr_queue.hh"
 #include "mem/mem_object.hh"
 #include "mem/packet.hh"
@@ -219,7 +220,11 @@ class BaseCache : public MemObject
      * Normally this is all possible memory addresses. */
     Range<Addr> addrRange;
 
+    /** number of cpus sharing this cache - from config file */
+    int _numCpus;
+
   public:
+    int numCpus() { return _numCpus; }
     // Statistics
     /**
      * @addtogroup CacheStatistics
@@ -481,9 +486,25 @@ class BaseCache : public MemObject
 
     virtual bool inMissQueue(Addr addr) = 0;
 
-    void incMissCount(PacketPtr pkt)
+    void incMissCount(PacketPtr pkt, int id)
     {
-        misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            misses[pkt->cmdToIndex()][0]++;
+            /* same thing for writeback hits as misses - no context id
+             * available, meanwhile writeback hit/miss stats are not used
+             * in any aggregate hit/miss calculations, so just lump them all
+             * in bucket 0 */
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            misses[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            misses[pkt->cmdToIndex()][id % _numCpus]++;
+        }
 
         if (missCount) {
             --missCount;
@@ -491,6 +512,29 @@ class BaseCache : public MemObject
                 exitSimLoop("A cache reached the maximum miss count");
         }
     }
+    void incHitCount(PacketPtr pkt, int id)
+    {
+
+        /* Writeback requests don't have a context id associated with
+         * them, so attributing a hit to a -1 context id is obviously a
+         * problem.  I've noticed in the stats that hits are split into
+         * demand and non-demand hits - neither of which include writeback
+         * hits, so here, I'll just put the writeback hits into bucket 0
+         * since it won't mess with any other stats -hsul */
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            hits[pkt->cmdToIndex()][0]++;
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            hits[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            /* the % is necessary in case there are switch cpus */
+            hits[pkt->cmdToIndex()][id % _numCpus]++;
+        }
+    }
 
 };
 
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 4f023e848..bf78a2268 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -104,6 +104,9 @@ class CacheBlk
     /** Number of references to this block since it was brought in. */
     int refCount;
 
+    /** holds the context source ID of the requestor for this block. */
+    int contextSrc;
+
   protected:
     /**
      * Represents that the indicated thread context has a "lock" on
@@ -133,7 +136,7 @@ class CacheBlk
 
     CacheBlk()
         : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
-          set(-1), isTouched(false), refCount(0)
+          set(-1), isTouched(false), refCount(0), contextSrc(-1)
     {}
 
     /**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 2397a17c5..206361f88 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -277,7 +277,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
 
         if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
             // OK to satisfy access
-            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            incHitCount(pkt, id);
             satisfyCpuSideRequest(pkt, blk);
             return true;
         }
@@ -297,7 +297,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
             if (blk == NULL) {
                 // no replaceable block available, give up.
                 // writeback will be forwarded to next level.
-                incMissCount(pkt);
+                incMissCount(pkt, id);
                 return false;
             }
             int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
@@ -308,11 +308,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
         blk->status |= BlkDirty;
         // nothing else to do; writeback doesn't expect response
         assert(!pkt->needsResponse());
-        hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+        incHitCount(pkt, id);
         return true;
     }
 
-    incMissCount(pkt);
+    incMissCount(pkt, id);
 
     if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
         // complete miss on store conditional... just give up now
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index e18026a21..8c6c145ca 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -87,5 +87,20 @@ BaseTags::regStats(const string &name)
         .desc("Cycle when the warmup percentage was hit.")
         ;
 
+    occupancies
+        .init(cache->numCpus())
+        .name(name + ".occ_blocks")
+        .desc("Average occupied blocks per context")
+        .flags(nozero | nonan)
+        ;
+
+    avgOccs
+        .name(name + ".occ_%")
+        .desc("Average percentage of cache occupancy")
+        .flags(nozero)
+        ;
+
+    avgOccs = occupancies / Stats::constant(numBlocks);
+
     registerExitCallback(new BaseTagsCallback(this));
 }
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index 46c7186b1..fc8470290 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -63,6 +63,9 @@ class BaseTags
     /** Marked true when the cache is warmed up. */
     bool warmedUp;
 
+    /** the number of blocks in the cache */
+    unsigned numBlocks;
+
     // Statistics
     /**
      * @addtogroup CacheStatistics
@@ -92,6 +95,13 @@ class BaseTags
 
     /** The cycle that the warmup percentage was hit. */
     Stats::Scalar warmupCycle;
+
+    /** Average occupancy of each context/cpu using the cache */
+    Stats::AverageVector occupancies;
+
+    /** Average occ % of each context/cpu using the cache */
+    Stats::Formula avgOccs;
+
     /**
      * @}
      */
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 808f9e25a..d13ba4973 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -43,8 +43,7 @@
 using namespace std;
 
 FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
-    : blkSize(_blkSize), size(_size),
-      numBlks(size/blkSize), hitLatency(hit_latency)
+    : blkSize(_blkSize), size(_size), hitLatency(hit_latency)
 {
     if (!isPowerOf2(blkSize))
         fatal("cache block size (in bytes) `%d' must be a power of two",
@@ -65,23 +64,24 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
 
     warmedUp = false;
     warmupBound = size/blkSize;
+    numBlocks = size/blkSize;
 
-    blks = new FALRUBlk[numBlks];
+    blks = new FALRUBlk[numBlocks];
     head = &(blks[0]);
-    tail = &(blks[numBlks-1]);
+    tail = &(blks[numBlocks-1]);
 
     head->prev = NULL;
     head->next = &(blks[1]);
     head->inCache = cacheMask;
 
-    tail->prev = &(blks[numBlks-2]);
+    tail->prev = &(blks[numBlocks-2]);
     tail->next = NULL;
     tail->inCache = 0;
 
     unsigned index = (1 << 17) / blkSize;
     unsigned j = 0;
     int flags = cacheMask;
-    for (unsigned i = 1; i < numBlks - 1; i++) {
+    for (unsigned i = 1; i < numBlocks - 1; i++) {
         blks[i].inCache = flags;
         if (i == index - 1){
             cacheBoundaries[j] = &(blks[i]);
@@ -94,7 +94,7 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
         blks[i].isTouched = false;
     }
     assert(j == numCaches);
-    assert(index == numBlks);
+    assert(index == numBlocks);
     //assert(check());
 }
 
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index b20d25d2b..5047da12a 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -84,8 +84,6 @@ class FALRU : public BaseTags
     const unsigned blkSize;
     /** The size of the cache. */
     const unsigned size;
-    /** The number of blocks in the cache. */
-    const unsigned numBlks; // calculated internally
     /** The hit latency of the cache. */
     const unsigned hitLatency;
 
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index a8ef4e6fb..f9afa5839 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -60,7 +60,6 @@ IIC::IIC(IIC::Params &params) :
     tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
     subShift(floorLog2(subSize)), subMask(numSub - 1),
     hashDelay(params.hashDelay),
-    numBlocks(params.size/subSize),
     numTags(hashSets * assoc + params.size/blkSize -1),
     numSecondary(params.size/blkSize),
     tagNull(numTags),
@@ -88,6 +87,7 @@ IIC::IIC(IIC::Params &params) :
 
     warmedUp = false;
     warmupBound = params.size/blkSize;
+    numBlocks = params.size/subSize;
 
     // Replacement Policy Initialization
     repl = params.rp;
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index c96cdaf3e..5b12128c6 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -197,8 +197,6 @@ class IIC : public BaseTags
 
     /** The latency of a hash lookup. */
     const unsigned hashDelay;
-    /** The number of data blocks. */
-    const unsigned numBlocks;
     /** The total number of tags in primary and secondary. */
     const unsigned numTags;
     /** The number of tags in the secondary tag store. */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 81d44b0c0..0da2a72e9 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -74,7 +74,8 @@ LRU::LRU(unsigned _numSets, unsigned _blkSize, unsigned _assoc,
     sets = new CacheSet[numSets];
     blks = new BlkType[numSets * assoc];
     // allocate data storage in one big chunk
-    dataBlks = new uint8_t[numSets*assoc*blkSize];
+    numBlocks = numSets * assoc;
+    dataBlks = new uint8_t[numBlocks * blkSize];
 
     unsigned blkIndex = 0;       // index into blks array
     for (unsigned i = 0; i < numSets; ++i) {
@@ -157,6 +158,12 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
         ++sampledRefs;
         blk->refCount = 0;
 
+        // deal with evicted block
+        if (blk->contextSrc != -1) {
+            occupancies[blk->contextSrc % cache->numCpus()]--;
+            blk->contextSrc = -1;
+        }
+
         DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
                 set, regenerateBlkAddr(blk->tag, set));
     }
@@ -178,6 +185,12 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
     // Set tag for new block.  Caller is responsible for setting status.
     blk->tag = extractTag(addr);
 
+    // deal with what we are bringing in
+    if (context_src != -1) {
+        occupancies[context_src % cache->numCpus()]++;
+        blk->contextSrc = context_src;
+    }
+
     unsigned set = extractSet(addr);
     sets[set].moveToHead(blk);
 }
@@ -190,6 +203,10 @@ LRU::invalidateBlk(BlkType *blk)
         blk->isTouched = false;
         blk->clearLoadLocks();
         tagsInUse--;
+        if (blk->contextSrc != -1) {
+            occupancies[blk->contextSrc % cache->numCpus()]--;
+            blk->contextSrc = -1;
+        }
     }
 }
 
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index 93ea4cc0e..d75bd3d8c 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -63,6 +63,7 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(),
 system.toL2Bus = Bus(clock="500GHz", width=16)
 system.l2c = L2(size='64kB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
 
 # connect l2c to membus
 system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py
index 59776d5c3..b5c720dda 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -62,6 +62,7 @@ Bus())
 system.toL2Bus = Bus()
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
 
 # connect l2c to membus
 system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
index bc0ced250..75ffefd08 100644
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -61,6 +61,7 @@ Bus())
 system.toL2Bus = Bus()
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
 
 # connect l2c to membus
 system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
index 0b400e6b7..7a8da70bb 100644
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -61,6 +61,7 @@ Bus())
 system.toL2Bus = Bus()
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
 
 # connect l2c to membus
 system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py
index 76aca3806..d19dc9c26 100644
--- a/tests/configs/tsunami-o3-dual.py
+++ b/tests/configs/tsunami-o3-dual.py
@@ -85,6 +85,7 @@ system.iocache.mem_side = system.membus.port
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
 system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
 
 #connect up the cpu and l1s
 for c in cpus:
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index dfbdd101d..d78a09db4 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
 system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
 
 #connect up the cpu and l1s
 for c in cpus:
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index ce17475e3..13b7bf32e 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
 system.l2c = L2(size='4MB', assoc=8)
 system.l2c.cpu_side = system.toL2Bus.port
 system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
 
 #connect up the cpu and l1s
 for c in cpus: