summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDam Sunwoo <dam.sunwoo@arm.com>2014-01-24 15:29:30 -0600
committerDam Sunwoo <dam.sunwoo@arm.com>2014-01-24 15:29:30 -0600
commit85e8779de78ed913bb6d2a794bee5252d719b0e5 (patch)
tree8ebd9519b4a6b0590c4d675061a0d1d4b43a1928 /src
parent739c6df94ea0030fea04065e6b8d8a1e232752a0 (diff)
downloadgem5-85e8779de78ed913bb6d2a794bee5252d719b0e5.tar.xz
mem: per-thread cache occupancy and per-block ages
This patch enables tracking of cache occupancy per thread along with ages (in buckets) per cache blocks. Cache occupancy stats are recalculated on each stat dump.
Diffstat (limited to 'src')
-rw-r--r--src/arch/arm/table_walker.cc2
-rw-r--r--src/arch/arm/tlb.cc3
-rw-r--r--src/cpu/base_dyn_inst.hh4
-rw-r--r--src/cpu/o3/fetch_impl.hh2
-rw-r--r--src/cpu/simple/atomic.cc3
-rw-r--r--src/cpu/simple/timing.cc5
-rw-r--r--src/dev/dma_device.cc1
-rw-r--r--src/mem/cache/blk.hh12
-rw-r--r--src/mem/cache/cache_impl.hh6
-rw-r--r--src/mem/cache/prefetch/base.cc1
-rw-r--r--src/mem/cache/tags/base.cc23
-rw-r--r--src/mem/cache/tags/base.hh22
-rw-r--r--src/mem/cache/tags/lru.cc40
-rw-r--r--src/mem/cache/tags/lru.hh5
-rw-r--r--src/mem/request.hh23
15 files changed, 149 insertions, 3 deletions
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index 9755299ff..d419fdec5 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -308,6 +308,7 @@ TableWalker::processWalk()
f = currState->fault;
} else {
RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId);
+ req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataStatic((uint8_t*)&currState->l1Desc.data);
port.sendFunctional(pkt);
@@ -653,6 +654,7 @@ TableWalker::doL1Descriptor()
} else {
RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0,
masterId);
+ req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataStatic((uint8_t*)&currState->l2Desc.data);
port.sendFunctional(pkt);
diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc
index 107901f99..805898576 100644
--- a/src/arch/arm/tlb.cc
+++ b/src/arch/arm/tlb.cc
@@ -54,6 +54,7 @@
#include "base/inifile.hh"
#include "base/str.hh"
#include "base/trace.hh"
+#include "cpu/base.hh"
#include "cpu/thread_context.hh"
#include "debug/Checkpoint.hh"
#include "debug/TLB.hh"
@@ -477,6 +478,8 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
if (is_priv)
req->setFlags(Request::PRIVILEGED);
+ req->taskId(tc->getCpuPtr()->taskId());
+
DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n",
isPriv, flags & UserMode);
// If this is a clrex instruction, provide a PA of 0 with no fault
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index b7b076820..f12a89bbd 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -890,6 +890,8 @@ BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
thread->contextId(), threadNumber);
+ req->taskId(cpu->taskId());
+
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);
@@ -953,6 +955,8 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
thread->contextId(), threadNumber);
+ req->taskId(cpu->taskId());
+
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 5b04c2a25..a81125da6 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -604,6 +604,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Request::INST_FETCH, cpu->instMasterId(), pc,
cpu->thread[tid]->contextId(), tid);
+ mem_req->taskId(cpu->taskId());
+
memReq[tid] = mem_req;
// Initiate translation of the icache block
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 13c4b9bd3..617e845a5 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -301,6 +301,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
dcache_latency = 0;
+ req->taskId(taskId());
while (1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
@@ -387,6 +388,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
dcache_latency = 0;
+ req->taskId(taskId());
while(1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
@@ -492,6 +494,7 @@ AtomicSimpleCPU::tick()
bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
!curMacroStaticInst;
if (needToFetch) {
+ ifetch_req.taskId(taskId());
setupFetchRequest(&ifetch_req);
fault = thread->itb->translateAtomic(&ifetch_req, tc,
BaseTLB::Execute);
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 9253d8005..7996a6ddd 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -415,6 +415,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
+ req->taskId(taskId());
+
Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
@@ -484,6 +486,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
+ req->taskId(taskId());
+
Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
@@ -561,6 +565,7 @@ TimingSimpleCPU::fetch()
if (needToFetch) {
_status = BaseSimpleCPU::Running;
Request *ifetch_req = new Request();
+ ifetch_req->taskId(taskId());
ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
setupFetchRequest(ifetch_req);
DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index 5eb5e1f9d..5033c3617 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -166,6 +166,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
!gen.done(); gen.next()) {
Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
+ req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, cmd);
// Increment the data pointer on a write
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 4a89f3892..47cd305c0 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -80,6 +80,9 @@ enum CacheBlkStatusBits {
class CacheBlk
{
public:
+ /** Task Id associated with this block */
+ uint32_t task_id;
+
/** The address space ID of this block. */
int asid;
/** Data block tag value. */
@@ -119,6 +122,8 @@ class CacheBlk
/** holds the source requestor ID for this block. */
int srcMasterId;
+ Tick tickInserted;
+
protected:
/**
* Represents that the indicated thread context has a "lock" on
@@ -162,9 +167,11 @@ class CacheBlk
public:
CacheBlk()
- : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
+ : task_id(ContextSwitchTaskId::Unknown),
+ asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
set(-1), isTouched(false), refCount(0),
- srcMasterId(Request::invldMasterId)
+ srcMasterId(Request::invldMasterId),
+ tickInserted(0)
{}
/**
@@ -182,6 +189,7 @@ class CacheBlk
whenReady = rhs.whenReady;
set = rhs.set;
refCount = rhs.refCount;
+ task_id = rhs.task_id;
return *this;
}
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 6d7011819..e86b3d704 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1074,6 +1074,11 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
Request *writebackReq =
new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
Request::wbMasterId);
+
+ writebackReq->taskId(blk->task_id);
+ blk->task_id= ContextSwitchTaskId::Unknown;
+ blk->tickInserted = curTick();
+
PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback);
if (blk->isWritable()) {
writeback->setSupplyExclusive();
@@ -1120,6 +1125,7 @@ Cache<TagStore>::writebackVisitor(BlkType &blk)
Request request(tags->regenerateBlkAddr(blk.tag, blk.set),
blkSize, 0, Request::funcMasterId);
+ request.taskId(blk.task_id);
Packet packet(&request, MemCmd::WriteReq);
packet.dataStatic(blk.data);
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index 6463f78f8..ed7b63f82 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -247,6 +247,7 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
// create a prefetch memreq
Request *prefetchReq = new Request(*addrIter, blkSize, 0, masterId);
+ prefetchReq->taskId(ContextSwitchTaskId::Prefetcher);
PacketPtr prefetch =
new Packet(prefetchReq, MemCmd::HardPFReq);
prefetch->allocate();
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index 947bd05de..b669a5b06 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -125,5 +125,28 @@ BaseTags::regStats()
avgOccs = occupancies / Stats::constant(numBlocks);
+ occupanciesTaskId
+ .init(ContextSwitchTaskId::NumTaskId)
+ .name(name() + ".occ_task_id_blocks")
+ .desc("Occupied blocks per task id")
+ .flags(nozero | nonan)
+ ;
+
+ ageTaskId
+ .init(ContextSwitchTaskId::NumTaskId, 5)
+ .name(name() + ".age_task_id_blocks")
+ .desc("Occupied blocks per task id")
+ .flags(nozero | nonan)
+ ;
+
+ percentOccsTaskId
+ .name(name() + ".occ_task_id_percent")
+ .desc("Percentage of cache occupancy per task id")
+ .flags(nozero)
+ ;
+
+ percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks);
+
+ registerDumpCallback(new BaseTagsDumpCallback(this));
registerExitCallback(new BaseTagsCallback(this));
}
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index 8ce7d972a..e8c71f01f 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -121,6 +121,15 @@ class BaseTags : public ClockedObject
/** Average occ % of each requestor using the cache */
Stats::Formula avgOccs;
+ /** Occupancy of each context/cpu using the cache */
+ Stats::Vector occupanciesTaskId;
+
+ /** Occupancy of each context/cpu using the cache */
+ Stats::Vector2d ageTaskId;
+
+ /** Occ % of each context/cpu using the cache */
+ Stats::Formula percentOccsTaskId;
+
/**
* @}
*/
@@ -152,6 +161,11 @@ class BaseTags : public ClockedObject
virtual void cleanupRefs() {}
/**
+ * Computes stats just prior to dump event
+ */
+ virtual void computeStats() {}
+
+ /**
*iterated through all blocks and clear all locks
*Needed to clear all lock tracking at once
*/
@@ -171,4 +185,12 @@ class BaseTagsCallback : public Callback
virtual void process() { tags->cleanupRefs(); };
};
+class BaseTagsDumpCallback : public Callback
+{
+ BaseTags *tags;
+ public:
+ BaseTagsDumpCallback(BaseTags *t) : tags(t) {}
+ virtual void process() { tags->computeStats(); };
+};
+
#endif //__BASE_TAGS_HH__
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index db0cc0839..6b05744af 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -176,6 +176,7 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
{
Addr addr = pkt->getAddr();
MasterID master_id = pkt->req->masterId();
+ uint32_t task_id = pkt->req->taskId();
if (!blk->isTouched) {
tagsInUse++;
blk->isTouched = true;
@@ -210,6 +211,8 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
assert(master_id < cache->system->maxMasters());
occupancies[master_id]++;
blk->srcMasterId = master_id;
+ blk->task_id = task_id;
+ blk->tickInserted = curTick();
unsigned set = extractSet(addr);
sets[set].moveToHead(blk);
@@ -224,6 +227,8 @@ LRU::invalidate(BlkType *blk)
assert(blk->srcMasterId < cache->system->maxMasters());
occupancies[blk->srcMasterId]--;
blk->srcMasterId = Request::invldMasterId;
+ blk->task_id = ContextSwitchTaskId::Unknown;
+ blk->tickInserted = curTick();
// should be evicted before valid blocks
unsigned set = blk->set;
@@ -270,3 +275,38 @@ LRU::cleanupRefs()
}
}
}
+
+void
+LRU::computeStats()
+{
+ for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) {
+ occupanciesTaskId[i] = 0;
+ for (unsigned j = 0; j < 5; ++j) {
+ ageTaskId[i][j] = 0;
+ }
+ }
+
+ for (unsigned i = 0; i < numSets * assoc; ++i) {
+ if (blks[i].isValid()) {
+ assert(blks[i].task_id < ContextSwitchTaskId::NumTaskId);
+ occupanciesTaskId[blks[i].task_id]++;
+ Tick age = curTick() - blks[i].tickInserted;
+ assert(age >= 0);
+
+ int age_index;
+ if (age / SimClock::Int::us < 10) { // <10us
+ age_index = 0;
+ } else if (age / SimClock::Int::us < 100) { // <100us
+ age_index = 1;
+ } else if (age / SimClock::Int::ms < 1) { // <1ms
+ age_index = 2;
+ } else if (age / SimClock::Int::ms < 10) { // <10ms
+ age_index = 3;
+ } else
+ age_index = 4; // >10ms
+
+ ageTaskId[blks[i].task_id][age_index]++;
+ }
+ }
+}
+
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index af7f8665d..68c29b754 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -253,6 +253,11 @@ public:
virtual std::string print() const;
/**
+ * Called prior to dumping stats to compute task occupancy
+ */
+ virtual void computeStats();
+
+ /**
* Visit each block in the tag store and apply a visitor to the
* block.
*
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 54b671645..fb21e3ff3 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -219,6 +219,11 @@ class Request
*/
Tick _time;
+ /**
+ * The task id associated with this request
+ */
+ uint32_t _taskId;
+
/** The address space ID. */
int _asid;
@@ -244,7 +249,8 @@ class Request
* default constructor.)
*/
Request()
- : translateDelta(0), accessDelta(0), depth(0)
+ : _taskId(ContextSwitchTaskId::Unknown),
+ translateDelta(0), accessDelta(0), depth(0)
{}
/**
@@ -253,16 +259,19 @@ class Request
* These fields are adequate to perform a request.
*/
Request(Addr paddr, int size, Flags flags, MasterID mid)
+ : _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid);
}
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time)
+ : _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid, time);
}
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time, Addr pc)
+ : _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid, time);
privateFlags.set(VALID_PC);
@@ -271,6 +280,7 @@ class Request
Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc,
int cid, ThreadID tid)
+ : _taskId(ContextSwitchTaskId::Unknown)
{
setVirt(asid, vaddr, size, flags, mid, pc);
setThreadContext(cid, tid);
@@ -477,6 +487,17 @@ class Request
return _masterId;
}
+ uint32_t
+ taskId() const
+ {
+ return _taskId;
+ }
+
+ void
+ taskId(uint32_t id) {
+ _taskId = id;
+ }
+
/** Accessor function for asid.*/
int
getAsid()