From e0074324bacf500f9d0cc11ebc6e2f29bf3d8ba1 Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Thu, 16 Oct 2014 05:49:41 -0400 Subject: cpu: Probe points for basic PMU stats This changeset adds probe points that can be used to implement PMU counters for CPU stats. The following probes are supported: * BaseCPU::ppCycles / Cycles * BaseCPU::ppRetiredInsts / RetiredInsts * BaseCPU::ppRetiredLoads / RetiredLoads * BaseCPU::ppRetiredStores / RetiredStores * BaseCPU::ppRetiredBranches RetiredBranches --- src/cpu/base.cc | 36 +++++++++++++++++++++++++++++++++ src/cpu/base.hh | 51 +++++++++++++++++++++++++++++++++++++++++++++++ src/cpu/minor/execute.cc | 2 ++ src/cpu/minor/pipeline.hh | 5 +++++ src/cpu/o3/cpu.cc | 14 ++++++++++--- src/cpu/simple/atomic.cc | 7 ++++++- src/cpu/simple/base.cc | 3 +++ src/cpu/simple/timing.cc | 29 +++++++++++++++++---------- src/cpu/simple/timing.hh | 4 +++- 9 files changed, 135 insertions(+), 16 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/base.cc b/src/cpu/base.cc index c8c8ac571..ea4df2aa8 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -283,6 +283,42 @@ BaseCPU::startup() } } +ProbePoints::PMUUPtr +BaseCPU::pmuProbePoint(const char *name) +{ + ProbePoints::PMUUPtr ptr; + ptr.reset(new ProbePoints::PMU(getProbeManager(), name)); + + return ptr; +} + +void +BaseCPU::regProbePoints() +{ + ppCycles = pmuProbePoint("Cycles"); + + ppRetiredInsts = pmuProbePoint("RetiredInsts"); + ppRetiredLoads = pmuProbePoint("RetiredLoads"); + ppRetiredStores = pmuProbePoint("RetiredStores"); + ppRetiredBranches = pmuProbePoint("RetiredBranches"); +} + +void +BaseCPU::probeInstCommit(const StaticInstPtr &inst) +{ + if (!inst->isMicroop() || inst->isLastMicroop()) + ppRetiredInsts->notify(1); + + + if (inst->isLoad()) + ppRetiredLoads->notify(1); + + if (inst->isStore()) + ppRetiredLoads->notify(1); + + if (inst->isControl()) + ppRetiredBranches->notify(1); +} void BaseCPU::regStats() diff --git a/src/cpu/base.hh b/src/cpu/base.hh index a124b4282..75c8f7263 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -62,6 +62,7 @@ #include "sim/eventq.hh" #include "sim/full_system.hh" #include "sim/insttracer.hh" +#include "sim/probe/pmu.hh" #include "sim/system.hh" struct BaseCPUParams; @@ -280,6 +281,8 @@ class BaseCPU : public MemObject virtual void startup(); virtual void regStats(); + void regProbePoints() M5_ATTR_OVERRIDE; + void registerThreadContexts(); /** @@ -437,6 +440,54 @@ class BaseCPU : public MemObject */ void scheduleLoadStop(ThreadID tid, Counter loads, const char *cause); + public: + /** + * @{ + * @name PMU Probe points. + */ + + /** + * Helper method to trigger PMU probes for a committed + * instruction. + * + * @param inst Instruction that just committed + */ + virtual void probeInstCommit(const StaticInstPtr &inst); + + /** + * Helper method to instantiate probe points belonging to this + * object. + * + * @param name Name of the probe point. + * @return A unique_ptr to the new probe point. + */ + ProbePoints::PMUUPtr pmuProbePoint(const char *name); + + /** CPU cycle counter */ + ProbePoints::PMUUPtr ppCycles; + + /** + * Instruction commit probe point. + * + * This probe point is triggered whenever one or more instructions + * are committed. It is normally triggered once for every + * instruction. However, CPU models committing bundles of + * instructions may call notify once for the entire bundle. + */ + ProbePoints::PMUUPtr ppRetiredInsts; + + /** Retired load instructions */ + ProbePoints::PMUUPtr ppRetiredLoads; + /** Retired store instructions */ + ProbePoints::PMUUPtr ppRetiredStores; + + /** Retired branches (any type) */ + ProbePoints::PMUUPtr ppRetiredBranches; + + /** @} */ + + + // Function tracing private: bool functionTracingEnabled; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 5679f55a7..5f840a273 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -853,6 +853,8 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) /* Set the CP SeqNum to the numOps commit number */ if (inst->traceData) inst->traceData->setCPSeq(thread->numOp); + + cpu.probeInstCommit(inst->staticInst); } bool diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh index 893efbf50..355a3c6c2 100644 --- a/src/cpu/minor/pipeline.hh +++ b/src/cpu/minor/pipeline.hh @@ -126,6 +126,11 @@ class Pipeline : public Ticked * stages and pipeline advance) */ void evaluate(); + void countCycles(Cycles delta) M5_ATTR_OVERRIDE + { + cpu.ppCycles->notify(delta); + } + void minorTrace() const; /** Functions below here are BaseCPU operations passed on to pipeline diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 925b3d2d8..6895355f0 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -405,8 +405,11 @@ template void FullO3CPU::regProbePoints() { + BaseCPU::regProbePoints(); + ppInstAccessComplete = new ProbePointArg(getProbeManager(), "InstAccessComplete"); ppDataAccessComplete = new ProbePointArg >(getProbeManager(), "DataAccessComplete"); + fetch.regProbePoints(); iew.regProbePoints(); commit.regProbePoints(); @@ -534,6 +537,7 @@ FullO3CPU::tick() assert(getDrainState() != Drainable::Drained); ++numCycles; + ppCycles->notify(1); // activity = false; @@ -1444,6 +1448,8 @@ FullO3CPU::instDone(ThreadID tid, DynInstPtr &inst) // Check for instruction-count-based events. comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); system->instEventQueue.serviceEvents(system->totalNumInsts); + + probeInstCommit(inst->staticInst); } template @@ -1622,10 +1628,12 @@ FullO3CPU::wakeCPU() Cycles cycles(curCycle() - lastRunningCycle); // @todo: This is an oddity that is only here to match the stats - if (cycles != 0) + if (cycles > 1) { --cycles; - idleCycles += cycles; - numCycles += cycles; + idleCycles += cycles; + numCycles += cycles; + ppCycles->notify(cycles); + } schedule(tickEvent, clockEdge()); } diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 5af3854e7..d6dbb9292 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -233,7 +233,9 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num) assert(!tickEvent.scheduled()); notIdleFraction = 1; - numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend); + Cycles delta = ticksToCycles(thread->lastActivate - thread->lastSuspend); + numCycles += delta; + ppCycles->notify(delta); //Make sure ticks are still on multiples of cycles schedule(tickEvent, clockEdge(Cycles(0))); @@ -501,6 +503,7 @@ AtomicSimpleCPU::tick() for (int i = 0; i < width || locked; ++i) { numCycles++; + ppCycles->notify(1); if (!curStaticInst || !curStaticInst->isDelayedCommit()) checkForInterrupts(); @@ -614,6 +617,8 @@ AtomicSimpleCPU::tick() void AtomicSimpleCPU::regProbePoints() { + BaseCPU::regProbePoints(); + ppCommit = new ProbePointArg> (getProbeManager(), "Commit"); } diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 6101ff30f..60ab53999 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -544,6 +544,9 @@ BaseSimpleCPU::postExecute() delete traceData; traceData = NULL; } + + // Call CPU instruction commit probes + probeInstCommit(curStaticInst); } void diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 9a9714bee..84a2c09fd 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -178,7 +178,7 @@ TimingSimpleCPU::switchOut() assert(!stayAtPC); assert(microPC() == 0); - numCycles += curCycle() - previousCycle; + updateCycleCounts(); } @@ -332,8 +332,7 @@ TimingSimpleCPU::translationFault(const Fault &fault) { // fault may be NoFault in cases where a fault is suppressed, // for instance prefetches. - numCycles += curCycle() - previousCycle; - previousCycle = curCycle(); + updateCycleCounts(); if (traceData) { // Since there was a fault, we shouldn't trace this instruction. @@ -569,8 +568,7 @@ TimingSimpleCPU::fetch() _status = IcacheWaitResponse; completeIfetch(NULL); - numCycles += curCycle() - previousCycle; - previousCycle = curCycle(); + updateCycleCounts(); } } @@ -603,8 +601,7 @@ TimingSimpleCPU::sendFetch(const Fault &fault, RequestPtr req, advanceInst(fault); } - numCycles += curCycle() - previousCycle; - previousCycle = curCycle(); + updateCycleCounts(); } @@ -651,8 +648,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) _status = BaseSimpleCPU::Running; - numCycles += curCycle() - previousCycle; - previousCycle = curCycle(); + updateCycleCounts(); if (pkt) pkt->req->setAccessLatency(); @@ -753,8 +749,8 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) pkt->req->getFlags().isSet(Request::NO_ACCESS)); pkt->req->setAccessLatency(); - numCycles += curCycle() - previousCycle; - previousCycle = curCycle(); + + updateCycleCounts(); if (pkt->senderState) { SplitFragmentSenderState * send_state = @@ -808,6 +804,17 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) advanceInst(fault); } +void +TimingSimpleCPU::updateCycleCounts() +{ + const Cycles delta(curCycle() - previousCycle); + + numCycles += delta; + ppCycles->notify(delta); + + previousCycle = curCycle(); +} + void TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 24f7002ff..84c8f7418 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -245,13 +245,15 @@ class TimingSimpleCPU : public BaseSimpleCPU }; + void updateCycleCounts(); + IcachePort icachePort; DcachePort dcachePort; PacketPtr ifetch_pkt; PacketPtr dcache_pkt; - Tick previousCycle; + Cycles previousCycle; protected: -- cgit v1.2.3