diff options
-rw-r--r-- | configs/example/se.py | 1 | ||||
-rw-r--r-- | src/mem/cache/BaseCache.py | 1 | ||||
-rw-r--r-- | src/mem/cache/base.cc | 15 | ||||
-rw-r--r-- | src/mem/cache/base.hh | 48 | ||||
-rw-r--r-- | src/mem/cache/blk.hh | 5 | ||||
-rw-r--r-- | src/mem/cache/cache_impl.hh | 8 | ||||
-rw-r--r-- | src/mem/cache/tags/base.cc | 15 | ||||
-rw-r--r-- | src/mem/cache/tags/base.hh | 10 | ||||
-rw-r--r-- | src/mem/cache/tags/fa_lru.cc | 14 | ||||
-rw-r--r-- | src/mem/cache/tags/fa_lru.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/tags/iic.cc | 2 | ||||
-rw-r--r-- | src/mem/cache/tags/iic.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/tags/lru.cc | 19 | ||||
-rw-r--r-- | tests/configs/memtest.py | 1 | ||||
-rw-r--r-- | tests/configs/o3-timing-mp.py | 1 | ||||
-rw-r--r-- | tests/configs/simple-atomic-mp.py | 1 | ||||
-rw-r--r-- | tests/configs/simple-timing-mp.py | 1 | ||||
-rw-r--r-- | tests/configs/tsunami-o3-dual.py | 1 | ||||
-rw-r--r-- | tests/configs/tsunami-simple-atomic-dual.py | 1 | ||||
-rw-r--r-- | tests/configs/tsunami-simple-timing-dual.py | 1 |
20 files changed, 126 insertions, 23 deletions
diff --git a/configs/example/se.py b/configs/example/se.py index c490ed6b6..7c09bcc5c 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -151,6 +151,7 @@ if options.l2cache: system.tol2bus = Bus() system.l2.cpu_side = system.tol2bus.port system.l2.mem_side = system.membus.port + system.l2.num_cpus = np for i in xrange(np): if options.caches: diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index 5ded05400..dffac2234 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -44,6 +44,7 @@ class BaseCache(MemObject): prioritizeRequests = Param.Bool(False, "always service demand misses first") repl = Param.Repl(NULL, "replacement policy") + num_cpus = Param.Int(1, "number of cpus sharing this cache") size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index fe1f580bd..70bc51cda 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -62,7 +62,8 @@ BaseCache::BaseCache(const Params *p) noTargetMSHR(NULL), missCount(p->max_miss_count), drainEvent(NULL), - addrRange(p->addr_range) + addrRange(p->addr_range), + _numCpus(p->num_cpus) { } @@ -148,7 +149,11 @@ BaseCache::regStats() const string &cstr = cmd.toString(); hits[access_idx] - .init(maxThreadsPerCPU) +#if FULL_SYSTEM + .init(_numCpus + 1) +#else + .init(_numCpus) +#endif .name(name() + "." + cstr + "_hits") .desc("number of " + cstr + " hits") .flags(total | nozero | nonan) @@ -185,7 +190,11 @@ BaseCache::regStats() const string &cstr = cmd.toString(); misses[access_idx] - .init(maxThreadsPerCPU) +#if FULL_SYSTEM + .init(_numCpus + 1) +#else + .init(_numCpus) +#endif .name(name() + "." + cstr + "_misses") .desc("number of " + cstr + " misses") .flags(total | nozero | nonan) diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index c245fecd2..62e8ae126 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -47,6 +47,7 @@ #include "base/statistics.hh" #include "base/trace.hh" #include "base/types.hh" +#include "config/full_system.hh" #include "mem/cache/mshr_queue.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" @@ -219,7 +220,11 @@ class BaseCache : public MemObject * Normally this is all possible memory addresses. */ Range<Addr> addrRange; + /** number of cpus sharing this cache - from config file */ + int _numCpus; + public: + int numCpus() { return _numCpus; } // Statistics /** * @addtogroup CacheStatistics @@ -481,9 +486,25 @@ class BaseCache : public MemObject virtual bool inMissQueue(Addr addr) = 0; - void incMissCount(PacketPtr pkt) + void incMissCount(PacketPtr pkt, int id) { - misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + + if (pkt->cmd == MemCmd::Writeback) { + assert(id == -1); + misses[pkt->cmdToIndex()][0]++; + /* same thing for writeback hits as misses - no context id + * available, meanwhile writeback hit/miss stats are not used + * in any aggregate hit/miss calculations, so just lump them all + * in bucket 0 */ +#if FULL_SYSTEM + } else if (id == -1) { + // Device accesses have id -1 + // lump device accesses into their own bucket + misses[pkt->cmdToIndex()][_numCpus]++; +#endif + } else { + misses[pkt->cmdToIndex()][id % _numCpus]++; + } if (missCount) { --missCount; @@ -491,6 +512,29 @@ class BaseCache : public MemObject exitSimLoop("A cache reached the maximum miss count"); } } + void incHitCount(PacketPtr pkt, int id) + { + + /* Writeback requests don't have a context id associated with + * them, so attributing a hit to a -1 context id is obviously a + * problem. I've noticed in the stats that hits are split into + * demand and non-demand hits - neither of which include writeback + * hits, so here, I'll just put the writeback hits into bucket 0 + * since it won't mess with any other stats -hsul */ + if (pkt->cmd == MemCmd::Writeback) { + assert(id == -1); + hits[pkt->cmdToIndex()][0]++; +#if FULL_SYSTEM + } else if (id == -1) { + // Device accesses have id -1 + // lump device accesses into their own bucket + hits[pkt->cmdToIndex()][_numCpus]++; +#endif + } else { + /* the % is necessary in case there are switch cpus */ + hits[pkt->cmdToIndex()][id % _numCpus]++; + } + } }; diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh index 4f023e848..bf78a2268 100644 --- a/src/mem/cache/blk.hh +++ b/src/mem/cache/blk.hh @@ -104,6 +104,9 @@ class CacheBlk /** Number of references to this block since it was brought in. */ int refCount; + /** holds the context source ID of the requestor for this block. */ + int contextSrc; + protected: /** * Represents that the indicated thread context has a "lock" on @@ -133,7 +136,7 @@ class CacheBlk CacheBlk() : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), - set(-1), isTouched(false), refCount(0) + set(-1), isTouched(false), refCount(0), contextSrc(-1) {} /** diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 2397a17c5..206361f88 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -277,7 +277,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access - hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + incHitCount(pkt, id); satisfyCpuSideRequest(pkt, blk); return true; } @@ -297,7 +297,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, if (blk == NULL) { // no replaceable block available, give up. // writeback will be forwarded to next level. - incMissCount(pkt); + incMissCount(pkt, id); return false; } int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; @@ -308,11 +308,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, blk->status |= BlkDirty; // nothing else to do; writeback doesn't expect response assert(!pkt->needsResponse()); - hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + incHitCount(pkt, id); return true; } - incMissCount(pkt); + incMissCount(pkt, id); if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) { // complete miss on store conditional... just give up now diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc index e18026a21..8c6c145ca 100644 --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -87,5 +87,20 @@ BaseTags::regStats(const string &name) .desc("Cycle when the warmup percentage was hit.") ; + occupancies + .init(cache->numCpus()) + .name(name + ".occ_blocks") + .desc("Average occupied blocks per context") + .flags(nozero | nonan) + ; + + avgOccs + .name(name + ".occ_%") + .desc("Average percentage of cache occupancy") + .flags(nozero) + ; + + avgOccs = occupancies / Stats::constant(numBlocks); + registerExitCallback(new BaseTagsCallback(this)); } diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index 46c7186b1..fc8470290 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -63,6 +63,9 @@ class BaseTags /** Marked true when the cache is warmed up. */ bool warmedUp; + /** the number of blocks in the cache */ + unsigned numBlocks; + // Statistics /** * @addtogroup CacheStatistics @@ -92,6 +95,13 @@ class BaseTags /** The cycle that the warmup percentage was hit. */ Stats::Scalar warmupCycle; + + /** Average occupancy of each context/cpu using the cache */ + Stats::AverageVector occupancies; + + /** Average occ % of each context/cpu using the cache */ + Stats::Formula avgOccs; + /** * @} */ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 808f9e25a..d13ba4973 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -43,8 +43,7 @@ using namespace std; FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency) - : blkSize(_blkSize), size(_size), - numBlks(size/blkSize), hitLatency(hit_latency) + : blkSize(_blkSize), size(_size), hitLatency(hit_latency) { if (!isPowerOf2(blkSize)) fatal("cache block size (in bytes) `%d' must be a power of two", @@ -65,23 +64,24 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency) warmedUp = false; warmupBound = size/blkSize; + numBlocks = size/blkSize; - blks = new FALRUBlk[numBlks]; + blks = new FALRUBlk[numBlocks]; head = &(blks[0]); - tail = &(blks[numBlks-1]); + tail = &(blks[numBlocks-1]); head->prev = NULL; head->next = &(blks[1]); head->inCache = cacheMask; - tail->prev = &(blks[numBlks-2]); + tail->prev = &(blks[numBlocks-2]); tail->next = NULL; tail->inCache = 0; unsigned index = (1 << 17) / blkSize; unsigned j = 0; int flags = cacheMask; - for (unsigned i = 1; i < numBlks - 1; i++) { + for (unsigned i = 1; i < numBlocks - 1; i++) { blks[i].inCache = flags; if (i == index - 1){ cacheBoundaries[j] = &(blks[i]); @@ -94,7 +94,7 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency) blks[i].isTouched = false; } assert(j == numCaches); - assert(index == numBlks); + assert(index == numBlocks); //assert(check()); } diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index b20d25d2b..5047da12a 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -84,8 +84,6 @@ class FALRU : public BaseTags const unsigned blkSize; /** The size of the cache. */ const unsigned size; - /** The number of blocks in the cache. */ - const unsigned numBlks; // calculated internally /** The hit latency of the cache. */ const unsigned hitLatency; diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index a8ef4e6fb..f9afa5839 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -60,7 +60,6 @@ IIC::IIC(IIC::Params ¶ms) : tagShift(floorLog2(blkSize)), blkMask(blkSize - 1), subShift(floorLog2(subSize)), subMask(numSub - 1), hashDelay(params.hashDelay), - numBlocks(params.size/subSize), numTags(hashSets * assoc + params.size/blkSize -1), numSecondary(params.size/blkSize), tagNull(numTags), @@ -88,6 +87,7 @@ IIC::IIC(IIC::Params ¶ms) : warmedUp = false; warmupBound = params.size/blkSize; + numBlocks = params.size/subSize; // Replacement Policy Initialization repl = params.rp; diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index c96cdaf3e..5b12128c6 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -197,8 +197,6 @@ class IIC : public BaseTags /** The latency of a hash lookup. */ const unsigned hashDelay; - /** The number of data blocks. */ - const unsigned numBlocks; /** The total number of tags in primary and secondary. */ const unsigned numTags; /** The number of tags in the secondary tag store. */ diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 81d44b0c0..0da2a72e9 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -74,7 +74,8 @@ LRU::LRU(unsigned _numSets, unsigned _blkSize, unsigned _assoc, sets = new CacheSet[numSets]; blks = new BlkType[numSets * assoc]; // allocate data storage in one big chunk - dataBlks = new uint8_t[numSets*assoc*blkSize]; + numBlocks = numSets * assoc; + dataBlks = new uint8_t[numBlocks * blkSize]; unsigned blkIndex = 0; // index into blks array for (unsigned i = 0; i < numSets; ++i) { @@ -157,6 +158,12 @@ LRU::findVictim(Addr addr, PacketList &writebacks) ++sampledRefs; blk->refCount = 0; + // deal with evicted block + if (blk->contextSrc != -1) { + occupancies[blk->contextSrc % cache->numCpus()]--; + blk->contextSrc = -1; + } + DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n", set, regenerateBlkAddr(blk->tag, set)); } @@ -178,6 +185,12 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src) // Set tag for new block. Caller is responsible for setting status. blk->tag = extractTag(addr); + // deal with what we are bringing in + if (context_src != -1) { + occupancies[context_src % cache->numCpus()]++; + blk->contextSrc = context_src; + } + unsigned set = extractSet(addr); sets[set].moveToHead(blk); } @@ -190,6 +203,10 @@ LRU::invalidateBlk(BlkType *blk) blk->isTouched = false; blk->clearLoadLocks(); tagsInUse--; + if (blk->contextSrc != -1) { + occupancies[blk->contextSrc % cache->numCpus()]--; + blk->contextSrc = -1; + } } } diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index 93ea4cc0e..d75bd3d8c 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -63,6 +63,7 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(), system.toL2Bus = Bus(clock="500GHz", width=16) system.l2c = L2(size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port +system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py index 59776d5c3..b5c720dda 100644 --- a/tests/configs/o3-timing-mp.py +++ b/tests/configs/o3-timing-mp.py @@ -62,6 +62,7 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port +system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py index bc0ced250..75ffefd08 100644 --- a/tests/configs/simple-atomic-mp.py +++ b/tests/configs/simple-atomic-mp.py @@ -61,6 +61,7 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port +system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py index 0b400e6b7..7a8da70bb 100644 --- a/tests/configs/simple-timing-mp.py +++ b/tests/configs/simple-timing-mp.py @@ -61,6 +61,7 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port +system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py index 76aca3806..d19dc9c26 100644 --- a/tests/configs/tsunami-o3-dual.py +++ b/tests/configs/tsunami-o3-dual.py @@ -85,6 +85,7 @@ system.iocache.mem_side = system.membus.port system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port +system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index dfbdd101d..d78a09db4 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -83,6 +83,7 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port +system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index ce17475e3..13b7bf32e 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -83,6 +83,7 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port +system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: |