summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCurtis Dunham <Curtis.Dunham@arm.com>2014-12-23 09:31:18 -0500
committerCurtis Dunham <Curtis.Dunham@arm.com>2014-12-23 09:31:18 -0500
commit4d88978913c57e0cd10751d31d7f5b95c1e00170 (patch)
tree900bed4add4e20e374fdc1381a16e0e831d80792 /src
parent59460b91f35efe24a99424c0018d2f9c002e50c8 (diff)
downloadgem5-4d88978913c57e0cd10751d31d7f5b95c1e00170.tar.xz
arm: Add stats to table walker
This patch adds table walker stats for: - Walk events - Instruction vs Data - Page size histogram - Wait time and service time histograms - Pending requests histogram (per cycle) - measures dist. of L (p(1..) = how often busy, p(0) = how often idle) - Squashes, before starting and after completion
Diffstat (limited to 'src')
-rw-r--r--src/arch/arm/table_walker.cc186
-rw-r--r--src/arch/arm/table_walker.hh31
-rw-r--r--src/dev/dma_device.cc12
-rw-r--r--src/dev/dma_device.hh4
4 files changed, 225 insertions, 8 deletions
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index 6a0f0144a..6a1ac5577 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -60,6 +60,8 @@ TableWalker::TableWalker(const Params *p)
stage2Mmu(NULL), isStage2(p->is_stage2), tlb(NULL),
currState(NULL), pending(false), masterId(p->sys->getMasterId(name())),
numSquashable(p->num_squash_per_cycle),
+ pendingReqs(0),
+ pendingChangeTick(curTick()),
doL1DescEvent(this), doL2DescEvent(this),
doL0LongDescEvent(this), doL1LongDescEvent(this), doL2LongDescEvent(this),
doL3LongDescEvent(this),
@@ -151,6 +153,7 @@ TableWalker::drainResume()
if (params()->sys->isTimingMode() && currState) {
delete currState;
currState = NULL;
+ pendingChange();
}
}
@@ -170,6 +173,8 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
bool secure, TLB::ArmTranslationType tranType)
{
assert(!(_functional && _timing));
+ ++statWalks;
+
WalkerState *savedCurrState = NULL;
if (!currState && !_functional) {
@@ -196,10 +201,13 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
// this fault to re-execute the faulting instruction which should clean
// up everything.
if (currState->vaddr_tainted == _req->getVaddr()) {
+ ++statSquashedBefore;
return std::make_shared<ReExec>();
}
}
+ pendingChange();
+ currState->startTime = curTick();
currState->tc = _tc;
currState->aarch64 = opModeIs64(currOpMode(_tc));
currState->el = currEL(_tc);
@@ -261,6 +269,8 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
currState->isFetch = (currState->mode == TLB::Execute);
currState->isWrite = (currState->mode == TLB::Write);
+ statRequestOrigin[REQUESTED][currState->isFetch]++;
+
// We only do a second stage of translation if we're not secure, or in
// hyp mode, the second stage MMU is enabled, and this table walker
// instance is the first stage.
@@ -280,6 +290,10 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
currState->userTable = true;
currState->xnTable = false;
currState->pxnTable = false;
+
+ ++statWalksLongDescriptor;
+ } else {
+ ++statWalksShortDescriptor;
}
if (!currState->timing) {
@@ -303,8 +317,10 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
if (pending || pendingQueue.size()) {
pendingQueue.push_back(currState);
currState = NULL;
+ pendingChange();
} else {
pending = true;
+ pendingChange();
if (currState->aarch64)
return processWalkAArch64();
else if (long_desc_format)
@@ -321,6 +337,7 @@ TableWalker::processWalkWrapper()
{
assert(!currState);
assert(pendingQueue.size());
+ pendingChange();
currState = pendingQueue.front();
ExceptionLevel target_el = EL0;
@@ -372,6 +389,7 @@ TableWalker::processWalkWrapper()
(currState->transState->squashed() || te)) {
pendingQueue.pop_front();
num_squashed++;
+ statSquashedBefore++;
DPRINTF(TLB, "Squashing table walk for address %#x\n",
currState->vaddr_tainted);
@@ -383,6 +401,7 @@ TableWalker::processWalkWrapper()
currState->req, currState->tc, currState->mode);
} else {
// translate the request now that we know it will work
+ statWalkServiceTime.sample(curTick() - currState->startTime);
tlb->translateTiming(currState->req, currState->tc,
currState->transState, currState->mode);
@@ -402,8 +421,9 @@ TableWalker::processWalkWrapper()
currState = NULL;
}
}
+ pendingChange();
- // if we've still got pending translations schedule more work
+ // if we still have pending translations, schedule more work
nextWalk(tc);
currState = NULL;
}
@@ -420,6 +440,8 @@ TableWalker::processWalk()
currState->vaddr_tainted, currState->ttbcr, mbits(currState->vaddr, 31,
32 - currState->ttbcr.n));
+ statWalkWaitTime.sample(curTick() - currState->startTime);
+
if (currState->ttbcr.n == 0 || !mbits(currState->vaddr, 31,
32 - currState->ttbcr.n)) {
DPRINTF(TLB, " - Selecting TTBR0\n");
@@ -511,6 +533,8 @@ TableWalker::processWalkLPAE()
DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x\n",
currState->vaddr_tainted, currState->ttbcr);
+ statWalkWaitTime.sample(curTick() - currState->startTime);
+
Request::Flags flag = 0;
if (currState->isSecure)
flag.set(Request::SECURE);
@@ -700,6 +724,8 @@ TableWalker::processWalkAArch64()
static const GrainSize GrainMap_EL1_tg1[] =
{ ReservedGrain, Grain16KB, Grain4KB, Grain64KB };
+ statWalkWaitTime.sample(curTick() - currState->startTime);
+
// Determine TTBR, table size, granule size and phys. address range
Addr ttbr = 0;
int tsz = 0, ps = 0;
@@ -920,8 +946,8 @@ TableWalker::processWalkAArch64()
panic("Invalid table lookup level");
break;
}
- port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), event,
- (uint8_t*) &currState->longDesc.data,
+ port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t),
+ event, (uint8_t*) &currState->longDesc.data,
currState->tc->getCpuPtr()->clockPeriod(), flag);
DPRINTF(TLBVerbose,
"Adding to walker fifo: queue size before adding: %d\n",
@@ -1698,6 +1724,7 @@ TableWalker::doL1DescriptorWrapper()
if (currState->fault != NoFault) {
currState->transState->finish(currState->fault, currState->req,
currState->tc, currState->mode);
+ statWalksShortTerminatedAtLevel[0]++;
pending = false;
nextWalk(currState->tc);
@@ -1711,9 +1738,11 @@ TableWalker::doL1DescriptorWrapper()
// delay is not set so there is no L2 to do
// Don't finish the translation if a stage 2 look up is underway
if (!currState->doingStage2) {
+ statWalkServiceTime.sample(curTick() - currState->startTime);
DPRINTF(TLBVerbose, "calling translateTiming again\n");
currState->fault = tlb->translateTiming(currState->req, currState->tc,
currState->transState, currState->mode);
+ statWalksShortTerminatedAtLevel[0]++;
}
pending = false;
@@ -1749,13 +1778,16 @@ TableWalker::doL2DescriptorWrapper()
if (currState->fault != NoFault) {
currState->transState->finish(currState->fault, currState->req,
currState->tc, currState->mode);
+ statWalksShortTerminatedAtLevel[1]++;
}
else {
// Don't finish the translation if a stage 2 look up is underway
if (!currState->doingStage2) {
+ statWalkServiceTime.sample(curTick() - currState->startTime);
DPRINTF(TLBVerbose, "calling translateTiming again\n");
currState->fault = tlb->translateTiming(currState->req,
currState->tc, currState->transState, currState->mode);
+ statWalksShortTerminatedAtLevel[1]++;
}
}
@@ -1832,9 +1864,11 @@ TableWalker::doLongDescriptorWrapper(LookupLevel curr_lookup_level)
// Don't finish the translation if a stage 2 look up is underway
if (!currState->doingStage2) {
DPRINTF(TLBVerbose, "calling translateTiming again\n");
+ statWalkServiceTime.sample(curTick() - currState->startTime);
currState->fault = tlb->translateTiming(currState->req, currState->tc,
currState->transState,
currState->mode);
+ statWalksLongTerminatedAtLevel[(unsigned) curr_lookup_level]++;
}
pending = false;
@@ -1906,7 +1940,7 @@ TableWalker::fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes,
} else {
if (isTiming) {
port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, event, data,
- currState->tc->getCpuPtr()->clockPeriod(), flags);
+ currState->tc->getCpuPtr()->clockPeriod(),flags);
if (queueIndex >= 0) {
DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n",
stateQueues[queueIndex].size());
@@ -1956,6 +1990,9 @@ TableWalker::insertTableEntry(DescriptorBase &descriptor, bool longDescriptor)
else
te.el = 1;
+ statPageSizes[pageSizeNtoStatBin(te.N)]++;
+ statRequestOrigin[COMPLETED][currState->isFetch]++;
+
// ASID has no meaning for stage 2 TLB entries, so mark all stage 2 entries
// as global
te.global = descriptor.global(currState) || isStage2;
@@ -2024,3 +2061,144 @@ TableWalker::toLookupLevel(uint8_t lookup_level_as_int)
panic("Invalid lookup level conversion");
}
}
+
+/* this method keeps track of the table walker queue's residency, so
+ * needs to be called whenever requests start and complete. */
+void
+TableWalker::pendingChange()
+{
+ unsigned n = pendingQueue.size();
+ if ((currState != NULL) && (currState != pendingQueue.front())) {
+ ++n;
+ }
+
+ if (n != pendingReqs) {
+ Tick now = curTick();
+ statPendingWalks.sample(pendingReqs, now - pendingChangeTick);
+ pendingReqs = n;
+ pendingChangeTick = now;
+ }
+}
+
+uint8_t
+TableWalker::pageSizeNtoStatBin(uint8_t N)
+{
+ /* for statPageSizes */
+ switch(N) {
+ case 12: return 0; // 4K
+ case 14: return 1; // 16K (using 16K granule in v8-64)
+ case 16: return 2; // 64K
+ case 20: return 3; // 1M
+ case 21: return 4; // 2M-LPAE
+ case 24: return 5; // 16M
+ case 25: return 6; // 32M (using 16K granule in v8-64)
+ case 29: return 7; // 512M (using 64K granule in v8-64)
+ case 30: return 8; // 1G-LPAE
+ default:
+ panic("unknown page size");
+ return 255;
+ }
+}
+
+void
+TableWalker::regStats()
+{
+ statWalks
+ .name(name() + ".walks")
+ .desc("Table walker walks requested")
+ ;
+
+ statWalksShortDescriptor
+ .name(name() + ".walksShort")
+ .desc("Table walker walks initiated with short descriptors")
+ .flags(Stats::nozero)
+ ;
+
+ statWalksLongDescriptor
+ .name(name() + ".walksLong")
+ .desc("Table walker walks initiated with long descriptors")
+ .flags(Stats::nozero)
+ ;
+
+ statWalksShortTerminatedAtLevel
+ .init(2)
+ .name(name() + ".walksShortTerminationLevel")
+ .desc("Level at which table walker walks "
+ "with short descriptors terminate")
+ .flags(Stats::nozero)
+ ;
+ statWalksShortTerminatedAtLevel.subname(0, "Level1");
+ statWalksShortTerminatedAtLevel.subname(1, "Level2");
+
+ statWalksLongTerminatedAtLevel
+ .init(4)
+ .name(name() + ".walksLongTerminationLevel")
+ .desc("Level at which table walker walks "
+ "with long descriptors terminate")
+ .flags(Stats::nozero)
+ ;
+ statWalksLongTerminatedAtLevel.subname(0, "Level0");
+ statWalksLongTerminatedAtLevel.subname(1, "Level1");
+ statWalksLongTerminatedAtLevel.subname(2, "Level2");
+ statWalksLongTerminatedAtLevel.subname(3, "Level3");
+
+ statSquashedBefore
+ .name(name() + ".walksSquashedBefore")
+ .desc("Table walks squashed before starting")
+ .flags(Stats::nozero)
+ ;
+
+ statSquashedAfter
+ .name(name() + ".walksSquashedAfter")
+ .desc("Table walks squashed after completion")
+ .flags(Stats::nozero)
+ ;
+
+ statWalkWaitTime
+ .init(16)
+ .name(name() + ".walkWaitTime")
+ .desc("Table walker wait (enqueue to first request) latency")
+ .flags(Stats::pdf | Stats::nozero | Stats::nonan)
+ ;
+
+ statWalkServiceTime
+ .init(16)
+ .name(name() + ".walkCompletionTime")
+ .desc("Table walker service (enqueue to completion) latency")
+ .flags(Stats::pdf | Stats::nozero | Stats::nonan)
+ ;
+
+ statPendingWalks
+ .init(16)
+ .name(name() + ".walksPending")
+ .desc("Table walker pending requests distribution")
+ .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+ ;
+
+ statPageSizes // see DDI 0487A D4-1661
+ .init(9)
+ .name(name() + ".walkPageSizes")
+ .desc("Table walker page sizes translated")
+ .flags(Stats::total | Stats::pdf | Stats::dist | Stats::nozero)
+ ;
+ statPageSizes.subname(0, "4K");
+ statPageSizes.subname(1, "16K");
+ statPageSizes.subname(2, "64K");
+ statPageSizes.subname(3, "1M");
+ statPageSizes.subname(4, "2M");
+ statPageSizes.subname(5, "16M");
+ statPageSizes.subname(6, "32M");
+ statPageSizes.subname(7, "512M");
+ statPageSizes.subname(8, "1G");
+
+ statRequestOrigin
+ .init(2,2) // Instruction/Data, requests/completed
+ .name(name() + ".walkRequestOrigin")
+ .desc("Table walker requests started/completed, data/inst")
+ .flags(Stats::total)
+ ;
+ statRequestOrigin.subname(0,"Requested");
+ statRequestOrigin.subname(1,"Completed");
+ statRequestOrigin.ysubname(0,"Data");
+ statRequestOrigin.ysubname(1,"Inst");
+}
diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh
index b5d67c9a1..b265f5165 100644
--- a/src/arch/arm/table_walker.hh
+++ b/src/arch/arm/table_walker.hh
@@ -794,6 +794,12 @@ class TableWalker : public MemObject
TableWalker *tableWalker;
+ /** Timestamp for calculating elapsed time in service (for stats) */
+ Tick startTime;
+
+ /** Page entries walked during service (for stats) */
+ unsigned levels;
+
void doL1Descriptor();
void doL2Descriptor();
@@ -883,6 +889,26 @@ class TableWalker : public MemObject
bool _haveLargeAsid64;
ArmSystem *armSys;
+ /** Statistics */
+ Stats::Scalar statWalks;
+ Stats::Scalar statWalksShortDescriptor;
+ Stats::Scalar statWalksLongDescriptor;
+ Stats::Vector statWalksShortTerminatedAtLevel;
+ Stats::Vector statWalksLongTerminatedAtLevel;
+ Stats::Scalar statSquashedBefore;
+ Stats::Scalar statSquashedAfter;
+ Stats::Histogram statWalkWaitTime;
+ Stats::Histogram statWalkServiceTime;
+ Stats::Histogram statPendingWalks; // essentially "L" of queueing theory
+ Stats::Vector statPageSizes;
+ Stats::Vector2d statRequestOrigin;
+
+ mutable unsigned pendingReqs;
+ mutable Tick pendingChangeTick;
+
+ static const unsigned REQUESTED = 0;
+ static const unsigned COMPLETED = 1;
+
public:
typedef ArmTableWalkerParams Params;
TableWalker(const Params *p);
@@ -903,6 +929,7 @@ class TableWalker : public MemObject
virtual void drainResume();
virtual BaseMasterPort& getMasterPort(const std::string &if_name,
PortID idx = InvalidPortID);
+ void regStats();
/**
* Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to
@@ -976,6 +1003,10 @@ class TableWalker : public MemObject
EventWrapper<TableWalker, &TableWalker::processWalkWrapper> doProcessEvent;
void nextWalk(ThreadContext *tc);
+
+ void pendingChange();
+
+ static uint8_t pageSizeNtoStatBin(uint8_t N);
};
} // namespace ArmISA
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index 5033c3617..d53ea2546 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -152,7 +152,7 @@ DmaPort::recvRetry()
trySendTimingReq();
}
-void
+RequestPtr
DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
uint8_t *data, Tick delay, Request::Flags flag)
{
@@ -161,11 +161,17 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
// i.e. cache line size
DmaReqState *reqState = new DmaReqState(event, size, delay);
+ // (functionality added for Table Walker statistics)
+ // We're only interested in this when there will only be one request.
+ // For simplicity, we return the last request, which would also be
+ // the only request in that case.
+ RequestPtr req = NULL;
+
DPRINTF(DMA, "Starting DMA for addr: %#x size: %d sched: %d\n", addr, size,
event ? event->scheduled() : -1);
for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
!gen.done(); gen.next()) {
- Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
+ req = new Request(gen.addr(), gen.size(), flag, masterId);
req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, cmd);
@@ -184,6 +190,8 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
// just created, for atomic this involves actually completing all
// the requests
sendDma();
+
+ return req;
}
void
diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh
index d1245b977..6df4a287d 100644
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -142,8 +142,8 @@ class DmaPort : public MasterPort
DmaPort(MemObject *dev, System *s);
- void dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
- uint8_t *data, Tick delay, Request::Flags flag = 0);
+ RequestPtr dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
+ uint8_t *data, Tick delay, Request::Flags flag = 0);
bool dmaPending() const { return pendingCount > 0; }