summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configs/example/se.py1
-rw-r--r--src/mem/cache/BaseCache.py1
-rw-r--r--src/mem/cache/base.cc15
-rw-r--r--src/mem/cache/base.hh48
-rw-r--r--src/mem/cache/blk.hh5
-rw-r--r--src/mem/cache/cache_impl.hh8
-rw-r--r--src/mem/cache/tags/base.cc15
-rw-r--r--src/mem/cache/tags/base.hh10
-rw-r--r--src/mem/cache/tags/fa_lru.cc14
-rw-r--r--src/mem/cache/tags/fa_lru.hh2
-rw-r--r--src/mem/cache/tags/iic.cc2
-rw-r--r--src/mem/cache/tags/iic.hh2
-rw-r--r--src/mem/cache/tags/lru.cc19
-rw-r--r--tests/configs/memtest.py1
-rw-r--r--tests/configs/o3-timing-mp.py1
-rw-r--r--tests/configs/simple-atomic-mp.py1
-rw-r--r--tests/configs/simple-timing-mp.py1
-rw-r--r--tests/configs/tsunami-o3-dual.py1
-rw-r--r--tests/configs/tsunami-simple-atomic-dual.py1
-rw-r--r--tests/configs/tsunami-simple-timing-dual.py1
20 files changed, 126 insertions, 23 deletions
diff --git a/configs/example/se.py b/configs/example/se.py
index c490ed6b6..7c09bcc5c 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -151,6 +151,7 @@ if options.l2cache:
system.tol2bus = Bus()
system.l2.cpu_side = system.tol2bus.port
system.l2.mem_side = system.membus.port
+ system.l2.num_cpus = np
for i in xrange(np):
if options.caches:
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 5ded05400..dffac2234 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -44,6 +44,7 @@ class BaseCache(MemObject):
prioritizeRequests = Param.Bool(False,
"always service demand misses first")
repl = Param.Repl(NULL, "replacement policy")
+ num_cpus = Param.Int(1, "number of cpus sharing this cache")
size = Param.MemorySize("capacity in bytes")
forward_snoops = Param.Bool(True,
"forward snoops from mem side to cpu side")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index fe1f580bd..70bc51cda 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -62,7 +62,8 @@ BaseCache::BaseCache(const Params *p)
noTargetMSHR(NULL),
missCount(p->max_miss_count),
drainEvent(NULL),
- addrRange(p->addr_range)
+ addrRange(p->addr_range),
+ _numCpus(p->num_cpus)
{
}
@@ -148,7 +149,11 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
hits[access_idx]
- .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+ .init(_numCpus + 1)
+#else
+ .init(_numCpus)
+#endif
.name(name() + "." + cstr + "_hits")
.desc("number of " + cstr + " hits")
.flags(total | nozero | nonan)
@@ -185,7 +190,11 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
misses[access_idx]
- .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+ .init(_numCpus + 1)
+#else
+ .init(_numCpus)
+#endif
.name(name() + "." + cstr + "_misses")
.desc("number of " + cstr + " misses")
.flags(total | nozero | nonan)
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index c245fecd2..62e8ae126 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -47,6 +47,7 @@
#include "base/statistics.hh"
#include "base/trace.hh"
#include "base/types.hh"
+#include "config/full_system.hh"
#include "mem/cache/mshr_queue.hh"
#include "mem/mem_object.hh"
#include "mem/packet.hh"
@@ -219,7 +220,11 @@ class BaseCache : public MemObject
* Normally this is all possible memory addresses. */
Range<Addr> addrRange;
+ /** number of cpus sharing this cache - from config file */
+ int _numCpus;
+
public:
+ int numCpus() { return _numCpus; }
// Statistics
/**
* @addtogroup CacheStatistics
@@ -481,9 +486,25 @@ class BaseCache : public MemObject
virtual bool inMissQueue(Addr addr) = 0;
- void incMissCount(PacketPtr pkt)
+ void incMissCount(PacketPtr pkt, int id)
{
- misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+
+ if (pkt->cmd == MemCmd::Writeback) {
+ assert(id == -1);
+ misses[pkt->cmdToIndex()][0]++;
+ /* same thing for writeback hits as misses - no context id
+ * available, meanwhile writeback hit/miss stats are not used
+ * in any aggregate hit/miss calculations, so just lump them all
+ * in bucket 0 */
+#if FULL_SYSTEM
+ } else if (id == -1) {
+ // Device accesses have id -1
+ // lump device accesses into their own bucket
+ misses[pkt->cmdToIndex()][_numCpus]++;
+#endif
+ } else {
+ misses[pkt->cmdToIndex()][id % _numCpus]++;
+ }
if (missCount) {
--missCount;
@@ -491,6 +512,29 @@ class BaseCache : public MemObject
exitSimLoop("A cache reached the maximum miss count");
}
}
+ void incHitCount(PacketPtr pkt, int id)
+ {
+
+ /* Writeback requests don't have a context id associated with
+ * them, so attributing a hit to a -1 context id is obviously a
+ * problem. I've noticed in the stats that hits are split into
+ * demand and non-demand hits - neither of which include writeback
+ * hits, so here, I'll just put the writeback hits into bucket 0
+ * since it won't mess with any other stats -hsul */
+ if (pkt->cmd == MemCmd::Writeback) {
+ assert(id == -1);
+ hits[pkt->cmdToIndex()][0]++;
+#if FULL_SYSTEM
+ } else if (id == -1) {
+ // Device accesses have id -1
+ // lump device accesses into their own bucket
+ hits[pkt->cmdToIndex()][_numCpus]++;
+#endif
+ } else {
+ /* the % is necessary in case there are switch cpus */
+ hits[pkt->cmdToIndex()][id % _numCpus]++;
+ }
+ }
};
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 4f023e848..bf78a2268 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -104,6 +104,9 @@ class CacheBlk
/** Number of references to this block since it was brought in. */
int refCount;
+ /** holds the context source ID of the requestor for this block. */
+ int contextSrc;
+
protected:
/**
* Represents that the indicated thread context has a "lock" on
@@ -133,7 +136,7 @@ class CacheBlk
CacheBlk()
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
- set(-1), isTouched(false), refCount(0)
+ set(-1), isTouched(false), refCount(0), contextSrc(-1)
{}
/**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 2397a17c5..206361f88 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -277,7 +277,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
// OK to satisfy access
- hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ incHitCount(pkt, id);
satisfyCpuSideRequest(pkt, blk);
return true;
}
@@ -297,7 +297,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
if (blk == NULL) {
// no replaceable block available, give up.
// writeback will be forwarded to next level.
- incMissCount(pkt);
+ incMissCount(pkt, id);
return false;
}
int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
@@ -308,11 +308,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
blk->status |= BlkDirty;
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
- hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ incHitCount(pkt, id);
return true;
}
- incMissCount(pkt);
+ incMissCount(pkt, id);
if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
// complete miss on store conditional... just give up now
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index e18026a21..8c6c145ca 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -87,5 +87,20 @@ BaseTags::regStats(const string &name)
.desc("Cycle when the warmup percentage was hit.")
;
+ occupancies
+ .init(cache->numCpus())
+ .name(name + ".occ_blocks")
+ .desc("Average occupied blocks per context")
+ .flags(nozero | nonan)
+ ;
+
+ avgOccs
+ .name(name + ".occ_%")
+ .desc("Average percentage of cache occupancy")
+ .flags(nozero)
+ ;
+
+ avgOccs = occupancies / Stats::constant(numBlocks);
+
registerExitCallback(new BaseTagsCallback(this));
}
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index 46c7186b1..fc8470290 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -63,6 +63,9 @@ class BaseTags
/** Marked true when the cache is warmed up. */
bool warmedUp;
+ /** the number of blocks in the cache */
+ unsigned numBlocks;
+
// Statistics
/**
* @addtogroup CacheStatistics
@@ -92,6 +95,13 @@ class BaseTags
/** The cycle that the warmup percentage was hit. */
Stats::Scalar warmupCycle;
+
+ /** Average occupancy of each context/cpu using the cache */
+ Stats::AverageVector occupancies;
+
+ /** Average occ % of each context/cpu using the cache */
+ Stats::Formula avgOccs;
+
/**
* @}
*/
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 808f9e25a..d13ba4973 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -43,8 +43,7 @@
using namespace std;
FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
- : blkSize(_blkSize), size(_size),
- numBlks(size/blkSize), hitLatency(hit_latency)
+ : blkSize(_blkSize), size(_size), hitLatency(hit_latency)
{
if (!isPowerOf2(blkSize))
fatal("cache block size (in bytes) `%d' must be a power of two",
@@ -65,23 +64,24 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
warmedUp = false;
warmupBound = size/blkSize;
+ numBlocks = size/blkSize;
- blks = new FALRUBlk[numBlks];
+ blks = new FALRUBlk[numBlocks];
head = &(blks[0]);
- tail = &(blks[numBlks-1]);
+ tail = &(blks[numBlocks-1]);
head->prev = NULL;
head->next = &(blks[1]);
head->inCache = cacheMask;
- tail->prev = &(blks[numBlks-2]);
+ tail->prev = &(blks[numBlocks-2]);
tail->next = NULL;
tail->inCache = 0;
unsigned index = (1 << 17) / blkSize;
unsigned j = 0;
int flags = cacheMask;
- for (unsigned i = 1; i < numBlks - 1; i++) {
+ for (unsigned i = 1; i < numBlocks - 1; i++) {
blks[i].inCache = flags;
if (i == index - 1){
cacheBoundaries[j] = &(blks[i]);
@@ -94,7 +94,7 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
blks[i].isTouched = false;
}
assert(j == numCaches);
- assert(index == numBlks);
+ assert(index == numBlocks);
//assert(check());
}
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index b20d25d2b..5047da12a 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -84,8 +84,6 @@ class FALRU : public BaseTags
const unsigned blkSize;
/** The size of the cache. */
const unsigned size;
- /** The number of blocks in the cache. */
- const unsigned numBlks; // calculated internally
/** The hit latency of the cache. */
const unsigned hitLatency;
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index a8ef4e6fb..f9afa5839 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -60,7 +60,6 @@ IIC::IIC(IIC::Params &params) :
tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
subShift(floorLog2(subSize)), subMask(numSub - 1),
hashDelay(params.hashDelay),
- numBlocks(params.size/subSize),
numTags(hashSets * assoc + params.size/blkSize -1),
numSecondary(params.size/blkSize),
tagNull(numTags),
@@ -88,6 +87,7 @@ IIC::IIC(IIC::Params &params) :
warmedUp = false;
warmupBound = params.size/blkSize;
+ numBlocks = params.size/subSize;
// Replacement Policy Initialization
repl = params.rp;
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index c96cdaf3e..5b12128c6 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -197,8 +197,6 @@ class IIC : public BaseTags
/** The latency of a hash lookup. */
const unsigned hashDelay;
- /** The number of data blocks. */
- const unsigned numBlocks;
/** The total number of tags in primary and secondary. */
const unsigned numTags;
/** The number of tags in the secondary tag store. */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 81d44b0c0..0da2a72e9 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -74,7 +74,8 @@ LRU::LRU(unsigned _numSets, unsigned _blkSize, unsigned _assoc,
sets = new CacheSet[numSets];
blks = new BlkType[numSets * assoc];
// allocate data storage in one big chunk
- dataBlks = new uint8_t[numSets*assoc*blkSize];
+ numBlocks = numSets * assoc;
+ dataBlks = new uint8_t[numBlocks * blkSize];
unsigned blkIndex = 0; // index into blks array
for (unsigned i = 0; i < numSets; ++i) {
@@ -157,6 +158,12 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
++sampledRefs;
blk->refCount = 0;
+ // deal with evicted block
+ if (blk->contextSrc != -1) {
+ occupancies[blk->contextSrc % cache->numCpus()]--;
+ blk->contextSrc = -1;
+ }
+
DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
set, regenerateBlkAddr(blk->tag, set));
}
@@ -178,6 +185,12 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
// Set tag for new block. Caller is responsible for setting status.
blk->tag = extractTag(addr);
+ // deal with what we are bringing in
+ if (context_src != -1) {
+ occupancies[context_src % cache->numCpus()]++;
+ blk->contextSrc = context_src;
+ }
+
unsigned set = extractSet(addr);
sets[set].moveToHead(blk);
}
@@ -190,6 +203,10 @@ LRU::invalidateBlk(BlkType *blk)
blk->isTouched = false;
blk->clearLoadLocks();
tagsInUse--;
+ if (blk->contextSrc != -1) {
+ occupancies[blk->contextSrc % cache->numCpus()]--;
+ blk->contextSrc = -1;
+ }
}
}
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index 93ea4cc0e..d75bd3d8c 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -63,6 +63,7 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(),
system.toL2Bus = Bus(clock="500GHz", width=16)
system.l2c = L2(size='64kB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py
index 59776d5c3..b5c720dda 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -62,6 +62,7 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
index bc0ced250..75ffefd08 100644
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -61,6 +61,7 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
index 0b400e6b7..7a8da70bb 100644
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -61,6 +61,7 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
+system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py
index 76aca3806..d19dc9c26 100644
--- a/tests/configs/tsunami-o3-dual.py
+++ b/tests/configs/tsunami-o3-dual.py
@@ -85,6 +85,7 @@ system.iocache.mem_side = system.membus.port
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index dfbdd101d..d78a09db4 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index ce17475e3..13b7bf32e 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -83,6 +83,7 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
+system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus: