diff options
author | Joel Hestness <hestness@cs.utexas.edu> | 2011-02-06 22:14:17 -0800 |
---|---|---|
committer | Joel Hestness <hestness@cs.utexas.edu> | 2011-02-06 22:14:17 -0800 |
commit | b4c10bd6800b5ab5adee3035f1908d7a49a14ca9 (patch) | |
tree | d21b1bbf5c8df8c77d77b6983779b24189a1d8cb /src/cpu/o3 | |
parent | a679e732cee821616c20cc13c22ad2877072ff14 (diff) | |
download | gem5-b4c10bd6800b5ab5adee3035f1908d7a49a14ca9.tar.xz |
mcpat: Adds McPAT performance counters
Updated patches from Rick Strong's set that modify performance counters for
McPAT
Diffstat (limited to 'src/cpu/o3')
-rw-r--r-- | src/cpu/o3/commit.hh | 6 | ||||
-rw-r--r-- | src/cpu/o3/commit_impl.hh | 34 | ||||
-rw-r--r-- | src/cpu/o3/cpu.cc | 50 | ||||
-rw-r--r-- | src/cpu/o3/cpu.hh | 12 | ||||
-rw-r--r-- | src/cpu/o3/iew_impl.hh | 1 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue.hh | 10 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue_impl.hh | 61 | ||||
-rw-r--r-- | src/cpu/o3/rename.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 9 | ||||
-rw-r--r-- | src/cpu/o3/rob.hh | 8 | ||||
-rw-r--r-- | src/cpu/o3/rob_impl.hh | 20 |
11 files changed, 211 insertions, 2 deletions
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 659b0ad5f..047e29f5d 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -473,6 +473,12 @@ class DefaultCommit Stats::Vector statComMembars; /** Total number of committed branches. */ Stats::Vector statComBranches; + /** Total number of floating point instructions */ + Stats::Vector statComFloating; + /** Total number of integer instructions */ + Stats::Vector statComInteger; + /** Total number of function calls */ + Stats::Vector statComFunctionCalls; /** Number of cycles where the commit bandwidth limit is reached. */ Stats::Scalar commitEligibleSamples; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index a49e1497e..50c08e162 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -230,6 +230,27 @@ DefaultCommit<Impl>::regStats() .flags(total) ; + statComFloating + .init(cpu->numThreads) + .name(name() + ".COM:fp_insts") + .desc("Number of committed floating point instructions.") + .flags(total) + ; + + statComInteger + .init(cpu->numThreads) + .name(name()+".COM:int_insts") + .desc("Number of committed integer instructions.") + .flags(total) + ; + + statComFunctionCalls + .init(cpu->numThreads) + .name(name()+".COM:function_calls") + .desc("Number of function calls committed.") + .flags(total) + ; + commitEligible .init(cpu->numThreads) .name(name() + ".COM:bw_limited") @@ -1321,6 +1342,19 @@ DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst) if (inst->isMemBarrier()) { statComMembars[tid]++; } + + // Integer Instruction + if (inst->isInteger()) + statComInteger[tid]++; + + // Floating Point Instruction + if (inst->isFloating()) + statComFloating[tid]++; + + // Function Calls + if (inst->isCall()) + statComFunctionCalls[tid]++; + } //////////////////////////////////////// diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 9becc6601..2d3bc3f72 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2011 Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,7 @@ * * Authors: Kevin Lim * Korey Sewell + * Rick Strong */ #include "config/full_system.hh" @@ -480,6 +482,37 @@ FullO3CPU<Impl>::regStats() this->rename.regStats(); this->iew.regStats(); this->commit.regStats(); + this->rob.regStats(); + + intRegfileReads + .name(name() + ".int_regfile_reads") + .desc("number of integer regfile reads") + .prereq(intRegfileReads); + + intRegfileWrites + .name(name() + ".int_regfile_writes") + .desc("number of integer regfile writes") + .prereq(intRegfileWrites); + + fpRegfileReads + .name(name() + ".fp_regfile_reads") + .desc("number of floating regfile reads") + .prereq(fpRegfileReads); + + fpRegfileWrites + .name(name() + ".fp_regfile_writes") + .desc("number of floating regfile writes") + .prereq(fpRegfileWrites); + + miscRegfileReads + .name(name() + ".misc_regfile_reads") + .desc("number of misc regfile reads") + .prereq(miscRegfileReads); + + miscRegfileWrites + .name(name() + ".misc_regfile_writes") + .desc("number of misc regfile writes") + .prereq(miscRegfileWrites); } template <class Impl> @@ -1184,6 +1217,7 @@ template <class Impl> TheISA::MiscReg FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid) { + miscRegfileReads++; return this->isa[tid].readMiscReg(misc_reg, tcBase(tid)); } @@ -1200,6 +1234,7 @@ void FullO3CPU<Impl>::setMiscReg(int misc_reg, const TheISA::MiscReg &val, ThreadID tid) { + miscRegfileWrites++; this->isa[tid].setMiscReg(misc_reg, val, tcBase(tid)); } @@ -1207,6 +1242,7 @@ template <class Impl> uint64_t FullO3CPU<Impl>::readIntReg(int reg_idx) { + intRegfileReads++; return regFile.readIntReg(reg_idx); } @@ -1214,6 +1250,7 @@ template <class Impl> FloatReg FullO3CPU<Impl>::readFloatReg(int reg_idx) { + fpRegfileReads++; return regFile.readFloatReg(reg_idx); } @@ -1221,6 +1258,7 @@ template <class Impl> FloatRegBits FullO3CPU<Impl>::readFloatRegBits(int reg_idx) { + fpRegfileReads++; return regFile.readFloatRegBits(reg_idx); } @@ -1228,6 +1266,7 @@ template <class Impl> void FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) { + intRegfileWrites++; regFile.setIntReg(reg_idx, val); } @@ -1235,6 +1274,7 @@ template <class Impl> void FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val) { + fpRegfileWrites++; regFile.setFloatReg(reg_idx, val); } @@ -1242,6 +1282,7 @@ template <class Impl> void FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) { + fpRegfileWrites++; regFile.setFloatRegBits(reg_idx, val); } @@ -1249,6 +1290,7 @@ template <class Impl> uint64_t FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid) { + intRegfileReads++; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); return regFile.readIntReg(phys_reg); @@ -1258,6 +1300,7 @@ template <class Impl> float FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid) { + fpRegfileReads++; int idx = reg_idx + TheISA::NumIntRegs; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); @@ -1268,6 +1311,7 @@ template <class Impl> uint64_t FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, ThreadID tid) { + fpRegfileReads++; int idx = reg_idx + TheISA::NumIntRegs; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); @@ -1278,6 +1322,7 @@ template <class Impl> void FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) { + intRegfileWrites++; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); regFile.setIntReg(phys_reg, val); @@ -1287,6 +1332,7 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatReg(int reg_idx, float val, ThreadID tid) { + fpRegfileWrites++; int idx = reg_idx + TheISA::NumIntRegs; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); @@ -1297,6 +1343,7 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid) { + fpRegfileWrites++; int idx = reg_idx + TheISA::NumIntRegs; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); @@ -1364,9 +1411,10 @@ FullO3CPU<Impl>::instDone(ThreadID tid) thread[tid]->numInsts++; committedInsts[tid]++; totalCommittedInsts++; - + system->totalNumInsts++; // Check for instruction-count-based events. comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); + system->instEventQueue.serviceEvents(system->totalNumInsts); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index e3d13c840..69289996b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2011 Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,7 @@ * * Authors: Kevin Lim * Korey Sewell + * Rick Strong */ #ifndef __CPU_O3_CPU_HH__ @@ -726,6 +728,16 @@ class FullO3CPU : public BaseO3CPU Stats::Formula ipc; /** Stat for the total IPC. */ Stats::Formula totalIpc; + + //number of integer register file accesses + Stats::Scalar intRegfileReads; + Stats::Scalar intRegfileWrites; + //number of float register file accesses + Stats::Scalar fpRegfileReads; + Stats::Scalar fpRegfileWrites; + //number of misc + Stats::Scalar miscRegfileReads; + Stats::Scalar miscRegfileWrites; }; #endif // __CPU_O3_CPU_HH__ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index ce58868ba..3f3761ff3 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -698,6 +698,7 @@ DefaultIEW<Impl>::updateStatus() // If there are no ready instructions waiting to be scheduled by the IQ, // and there's no stores waiting to write back, and dispatch is not // unblocking, then there is no internal activity for the IEW stage. + instQueue.intInstQueueReads++; if (_status == Active && !instQueue.hasReadyInsts() && !ldstQueue.willWB() && !any_unblocking) { DPRINTF(IEW, "IEW switching to idle\n"); diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 56124d60f..be936e204 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -497,6 +497,16 @@ class InstructionQueue Stats::Vector fuBusy; /** Number of times the FU was busy per instruction issued. */ Stats::Formula fuBusyRate; + public: + Stats::Scalar intInstQueueReads; + Stats::Scalar intInstQueueWrites; + Stats::Scalar intInstQueueWakeupAccesses; + Stats::Scalar fpInstQueueReads; + Stats::Scalar fpInstQueueWrites; + Stats::Scalar fpInstQueueWakeupQccesses; + + Stats::Scalar intAluAccesses; + Stats::Scalar fpAluAccesses; }; #endif //__CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index ce408dfd0..91cb2f0c8 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -320,6 +320,47 @@ InstructionQueue<Impl>::regStats() // Tell mem dependence unit to reg stats as well. memDepUnit[tid].regStats(); } + + intInstQueueReads + .name(name() + ".int_inst_queue_reads") + .desc("Number of integer instruction queue reads") + .flags(total); + + intInstQueueWrites + .name(name() + ".int_inst_queue_writes") + .desc("Number of integer instruction queue writes") + .flags(total); + + intInstQueueWakeupAccesses + .name(name() + ".int_inst_queue_wakeup_accesses") + .desc("Number of integer instruction queue wakeup accesses") + .flags(total); + + fpInstQueueReads + .name(name() + ".fp_inst_queue_reads") + .desc("Number of floating instruction queue reads") + .flags(total); + + fpInstQueueWrites + .name(name() + ".fp_inst_queue_writes") + .desc("Number of floating instruction queue writes") + .flags(total); + + fpInstQueueWakeupQccesses + .name(name() + ".fp_inst_queue_wakeup_accesses") + .desc("Number of floating instruction queue wakeup accesses") + .flags(total); + + intAluAccesses + .name(name() + ".int_alu_accesses") + .desc("Number of integer alu accesses") + .flags(total); + + fpAluAccesses + .name(name() + ".fp_alu_accesses") + .desc("Number of floating point alu accesses") + .flags(total); + } template <class Impl> @@ -501,6 +542,7 @@ template <class Impl> void InstructionQueue<Impl>::insert(DynInstPtr &new_inst) { + new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; // Make sure the instruction is valid assert(new_inst); @@ -542,6 +584,7 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) { // @todo: Clean up this code; can do it by setting inst as unable // to issue, then calling normal insert on the inst. + new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; assert(new_inst); @@ -592,6 +635,11 @@ InstructionQueue<Impl>::getInstToExecute() assert(!instsToExecute.empty()); DynInstPtr inst = instsToExecute.front(); instsToExecute.pop_front(); + if (inst->isFloating()){ + fpInstQueueReads++; + } else { + intInstQueueReads++; + } return inst; } @@ -706,6 +754,8 @@ InstructionQueue<Impl>::scheduleReadyInsts() DynInstPtr issuing_inst = readyInsts[op_class].top(); + issuing_inst->isFloating() ? fpInstQueueReads++ : intInstQueueReads++; + assert(issuing_inst->seqNum == (*order_it).oldestInst); if (issuing_inst->isSquashed()) { @@ -731,7 +781,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() if (op_class != No_OpClass) { idx = fuPool->getUnit(op_class); - + issuing_inst->isFloating() ? fpAluAccesses++ : intAluAccesses++; if (idx > -1) { op_latency = fuPool->getOpLatency(op_class); } @@ -867,6 +917,13 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) { int dependents = 0; + // The instruction queue here takes care of both floating and int ops + if (completed_inst->isFloating()) { + fpInstQueueWakeupQccesses++; + } else { + intInstQueueWakeupAccesses++; + } + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); assert(!completed_inst->isSquashed()); @@ -997,6 +1054,7 @@ void InstructionQueue<Impl>::violation(DynInstPtr &store, DynInstPtr &faulting_load) { + intInstQueueWrites++; memDepUnit[store->threadNumber].violation(store, faulting_load); } @@ -1037,6 +1095,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid) (*squash_it)->seqNum > squashedSeqNum[tid]) { DynInstPtr squashed_inst = (*squash_it); + squashed_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; // Only handle the instruction if it actually is in the IQ and // hasn't already been squashed in the IQ. diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 4598a8d7b..901283111 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -470,6 +470,8 @@ class DefaultRename Stats::Scalar renameRenamedOperands; /** Stat for total number of source register rename lookups. */ Stats::Scalar renameRenameLookups; + Stats::Scalar intRenameLookups; + Stats::Scalar fpRenameLookups; /** Stat for total number of committed renaming mappings. */ Stats::Scalar renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ac2421dc7..1f34b7255 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -179,6 +179,14 @@ DefaultRename<Impl>::regStats() .desc("count of insts added to the skid buffer") .flags(Stats::total) ; + intRenameLookups + .name(name() + ".RENAME:int_rename_lookups") + .desc("Number of integer rename lookups") + .prereq(intRenameLookups); + fpRenameLookups + .name(name() + ".RENAME:fp_rename_lookups") + .desc("Number of floating rename lookups") + .prereq(fpRenameLookups); } template <class Impl> @@ -1012,6 +1020,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst, ThreadID tid) } ++renameRenameLookups; + inst->isFloating() ? fpRenameLookups++ : intRenameLookups++; } } diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index bdea07d1a..510c8c5dc 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -253,6 +253,9 @@ class ROB */ int countInsts(ThreadID tid); + /** Registers statistics. */ + void regStats(); + private: /** Pointer to the CPU. */ O3CPU *cpu; @@ -312,6 +315,11 @@ class ROB /** Number of active threads. */ ThreadID numThreads; + + // The number of rob_reads + Stats::Scalar robReads; + // The number of rob_writes + Stats::Scalar robWrites; }; #endif //__CPU_O3_ROB_HH__ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 37f1c5504..d9d1daded 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -204,6 +204,8 @@ ROB<Impl>::insertInst(DynInstPtr &inst) { assert(inst); + robWrites++; + DPRINTF(ROB, "Adding inst PC %s to the ROB.\n", inst->pcState()); assert(numInstsInROB != numEntries); @@ -237,6 +239,8 @@ template <class Impl> void ROB<Impl>::retireHead(ThreadID tid) { + robWrites++; + assert(numInstsInROB > 0); // Get the head ROB instruction. @@ -271,6 +275,7 @@ template <class Impl> bool ROB<Impl>::isHeadReady(ThreadID tid) { + robReads++; if (threadEntries[tid] != 0) { return instList[tid].front()->readyToCommit(); } @@ -315,6 +320,7 @@ template <class Impl> void ROB<Impl>::doSquash(ThreadID tid) { + robWrites++; DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", tid, squashedSeqNum[tid]); @@ -523,3 +529,17 @@ ROB<Impl>::readTailInst(ThreadID tid) return *tail_thread; } +template <class Impl> +void +ROB<Impl>::regStats() +{ + using namespace Stats; + robReads + .name(name() + ".rob_reads") + .desc("The number of ROB reads"); + + robWrites + .name(name() + ".rob_writes") + .desc("The number of ROB writes"); +} + |