summaryrefslogtreecommitdiff
path: root/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'cpu/o3')
-rw-r--r--cpu/o3/commit.hh19
-rw-r--r--cpu/o3/commit_impl.hh115
-rw-r--r--cpu/o3/fetch.hh3
-rw-r--r--cpu/o3/fetch_impl.hh29
-rw-r--r--cpu/o3/iew.hh33
-rw-r--r--cpu/o3/iew_impl.hh175
-rw-r--r--cpu/o3/inst_queue.hh17
-rw-r--r--cpu/o3/inst_queue_impl.hh128
-rw-r--r--cpu/o3/rename.hh12
-rw-r--r--cpu/o3/rename_impl.hh100
10 files changed, 555 insertions, 76 deletions
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh
index 93b74ebb0..f374b8fb7 100644
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -369,6 +369,8 @@ class DefaultCommit
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
+ void updateComInstStats(DynInstPtr &inst);
+
/** Stat for the total number of committed instructions. */
Stats::Scalar<> commitCommittedInsts;
/** Stat for the total number of squashed instructions discarded by commit.
@@ -383,15 +385,26 @@ class DefaultCommit
*/
Stats::Scalar<> commitNonSpecStalls;
/** Stat for the total number of committed branches. */
- Stats::Scalar<> commitCommittedBranches;
+// Stats::Scalar<> commitCommittedBranches;
/** Stat for the total number of committed loads. */
- Stats::Scalar<> commitCommittedLoads;
+// Stats::Scalar<> commitCommittedLoads;
/** Stat for the total number of committed memory references. */
- Stats::Scalar<> commitCommittedMemRefs;
+// Stats::Scalar<> commitCommittedMemRefs;
/** Stat for the total number of branch mispredicts that caused a squash. */
Stats::Scalar<> branchMispredicts;
/** Distribution of the number of committed instructions each cycle. */
Stats::Distribution<> numCommittedDist;
+
+ // total number of instructions committed
+ Stats::Vector<> stat_com_inst;
+ Stats::Vector<> stat_com_swp;
+ Stats::Vector<> stat_com_refs;
+ Stats::Vector<> stat_com_loads;
+ Stats::Vector<> stat_com_membars;
+ Stats::Vector<> stat_com_branches;
+
+ Stats::Scalar<> commit_eligible_samples;
+ Stats::Vector<> commit_eligible;
};
#endif // __CPU_O3_COMMIT_HH__
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index ef1ba9282..157e688c7 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -133,6 +133,7 @@ template <class Impl>
void
DefaultCommit<Impl>::regStats()
{
+ using namespace Stats;
commitCommittedInsts
.name(name() + ".commitCommittedInsts")
.desc("The number of committed instructions")
@@ -150,6 +151,7 @@ DefaultCommit<Impl>::regStats()
.desc("The number of times commit has been forced to stall to "
"communicate backwards")
.prereq(commitNonSpecStalls);
+/*
commitCommittedBranches
.name(name() + ".commitCommittedBranches")
.desc("The number of committed branches")
@@ -162,6 +164,7 @@ DefaultCommit<Impl>::regStats()
.name(name() + ".commitCommittedMemRefs")
.desc("The number of committed memory references")
.prereq(commitCommittedMemRefs);
+*/
branchMispredicts
.name(name() + ".branchMispredicts")
.desc("The number of times a branch was mispredicted")
@@ -172,6 +175,73 @@ DefaultCommit<Impl>::regStats()
.desc("Number of insts commited each cycle")
.flags(Stats::pdf)
;
+
+ stat_com_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:count")
+ .desc("Number of instructions committed")
+ .flags(total)
+ ;
+
+ stat_com_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:swp_count")
+ .desc("Number of s/w prefetches committed")
+ .flags(total)
+ ;
+
+ stat_com_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:refs")
+ .desc("Number of memory references committed")
+ .flags(total)
+ ;
+
+ stat_com_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:loads")
+ .desc("Number of loads committed")
+ .flags(total)
+ ;
+
+ stat_com_membars
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:membars")
+ .desc("Number of memory barriers committed")
+ .flags(total)
+ ;
+
+ stat_com_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:branches")
+ .desc("Number of branches committed")
+ .flags(total)
+ ;
+
+ //
+ // Commit-Eligible instructions...
+ //
+ // -> The number of instructions eligible to commit in those
+ // cycles where we reached our commit BW limit (less the number
+ // actually committed)
+ //
+ // -> The average value is computed over ALL CYCLES... not just
+ // the BW limited cycles
+ //
+ // -> The standard deviation is computed only over cycles where
+ // we reached the BW limit
+ //
+ commit_eligible
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:bw_limited")
+ .desc("number of insts not committed due to BW limits")
+ .flags(total)
+ ;
+
+ commit_eligible_samples
+ .name(name() + ".COM:bw_lim_events")
+ .desc("number cycles where commit BW limit reached")
+ ;
}
template <class Impl>
@@ -1060,9 +1130,7 @@ head_inst->isWriteBarrier())*/
return false;
}
- if (head_inst->isControl()) {
- ++commitCommittedBranches;
- }
+ updateComInstStats(head_inst);
// Now that the instruction is going to be committed, finalize its
// trace data.
@@ -1186,6 +1254,47 @@ DefaultCommit<Impl>::robDoneSquashing()
return true;
}
+template <class Impl>
+void
+DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+ unsigned thread = inst->threadNumber;
+
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch()) {
+ stat_com_swp[thread]++;
+ } else {
+ stat_com_inst[thread]++;
+ }
+#else
+ stat_com_inst[thread]++;
+#endif
+
+ //
+ // Control Instructions
+ //
+ if (inst->isControl())
+ stat_com_branches[thread]++;
+
+ //
+ // Memory references
+ //
+ if (inst->isMemRef()) {
+ stat_com_refs[thread]++;
+
+ if (inst->isLoad()) {
+ stat_com_loads[thread]++;
+ }
+ }
+
+ if (inst->isMemBarrier()) {
+ stat_com_membars[thread]++;
+ }
+}
+
////////////////////////////////////////
// //
// SMT COMMIT POLICY MAITAINED HERE //
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh
index f0f3f2745..f0b15cb86 100644
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -370,6 +370,7 @@ class DefaultFetch
Stats::Scalar<> icacheStallCycles;
/** Stat for total number of fetched instructions. */
Stats::Scalar<> fetchedInsts;
+ Stats::Scalar<> fetchedBranches;
/** Stat for total number of predicted branches. */
Stats::Scalar<> predictedBranches;
/** Stat for total number of cycles spent fetching. */
@@ -383,6 +384,8 @@ class DefaultFetch
Stats::Scalar<> fetchBlockedCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
+
+ Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
Stats::Formula idleRate;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index 7abc5733f..563a767df 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -178,6 +178,11 @@ DefaultFetch<Impl>::regStats()
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
+ fetchedBranches
+ .name(name() + ".fetchedBranches")
+ .desc("Number of branches that fetch encountered")
+ .prereq(fetchedBranches);
+
predictedBranches
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
@@ -209,6 +214,11 @@ DefaultFetch<Impl>::regStats()
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
+ fetchIcacheSquashes
+ .name(name() + ".fetchIcacheSquashes")
+ .desc("Number of outstanding Icache misses that were squashed")
+ .prereq(fetchIcacheSquashes);
+
fetchNisnDist
.init(/* base value */ 0,
/* last value */ fetchWidth,
@@ -322,8 +332,10 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
if (fetchStatus[tid] != IcacheMissStall ||
- req != memReq[tid])
+ req != memReq[tid]) {
+ ++fetchIcacheSquashes;
return;
+ }
// Wake up the CPU (if it went to sleep and was waiting on this completion
// event).
@@ -400,6 +412,8 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
+ ++fetchedBranches;
+
if (predict_taken) {
++predictedBranches;
}
@@ -457,6 +471,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// If translation was successful, attempt to read the first
// instruction.
if (fault == NoFault) {
+#if FULL_SYSTEM
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) {
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path!",
@@ -464,6 +479,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
ret_fault = TheISA::genMachineCheckFault();
return false;
}
+#endif
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fault = cpu->mem->read(memReq[tid], cacheData[tid]);
@@ -480,6 +496,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
MemAccessResult result = icacheInterface->access(memReq[tid]);
+ fetchedCacheLines++;
+
// If the cache missed, then schedule an event to wake
// up this stage once the cache miss completes.
// @todo: Possibly allow for longer than 1 cycle cache hits.
@@ -499,8 +517,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
"read.\n", tid);
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
-
- fetchedCacheLines++;
}
} else {
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
@@ -889,10 +905,14 @@ DefaultFetch<Impl>::fetch(bool &status_change)
if (!fetch_success)
return;
} else {
- if (fetchStatus[tid] == Blocked) {
+ if (fetchStatus[tid] == Idle) {
+ ++fetchIdleCycles;
+ } else if (fetchStatus[tid] == Blocked) {
++fetchBlockedCycles;
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
+ } else if (fetchStatus[tid] == IcacheMissStall) {
+ ++icacheStallCycles;
}
// Status is Idle, Squashing, Blocked, or IcacheMissStall, so
@@ -904,6 +924,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// If we had a stall due to an icache miss, then return.
if (fetchStatus[tid] == IcacheMissStall) {
+ ++icacheStallCycles;
status_change = true;
return;
}
diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh
index e55837812..58cd68b21 100644
--- a/cpu/o3/iew.hh
+++ b/cpu/o3/iew.hh
@@ -278,6 +278,8 @@ class DefaultIEW
void tick();
private:
+ void updateExeInstStats(DynInstPtr &inst);
+
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -443,9 +445,9 @@ class DefaultIEW
/** Stat for total number of executed instructions. */
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
- Stats::Scalar<> iewExecLoadInsts;
+ Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
- Stats::Scalar<> iewExecStoreInsts;
+// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Stat for total number of memory ordering violation events. */
@@ -456,6 +458,33 @@ class DefaultIEW
Stats::Scalar<> predictedNotTakenIncorrect;
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
+
+ Stats::Vector<> exe_swp;
+ Stats::Vector<> exe_nop;
+ Stats::Vector<> exe_refs;
+ Stats::Vector<> exe_branches;
+
+// Stats::Vector<> issued_ops;
+/*
+ Stats::Vector<> stat_fu_busy;
+ Stats::Vector2d<> stat_fuBusy;
+ Stats::Vector<> dist_unissued;
+ Stats::Vector2d<> stat_issued_inst_type;
+*/
+ Stats::Formula issue_rate;
+ Stats::Formula iewExecStoreInsts;
+// Stats::Formula issue_op_rate;
+// Stats::Formula fu_busy_rate;
+
+ Stats::Vector<> iewInstsToCommit;
+ Stats::Vector<> writeback_count;
+ Stats::Vector<> producer_inst;
+ Stats::Vector<> consumer_inst;
+ Stats::Vector<> wb_penalized;
+
+ Stats::Formula wb_rate;
+ Stats::Formula wb_fanout;
+ Stats::Formula wb_penalized_rate;
};
#endif // __CPU_O3_IEW_HH__
diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh
index 21eb7dcf8..2ae2e1361 100644
--- a/cpu/o3/iew_impl.hh
+++ b/cpu/o3/iew_impl.hh
@@ -140,6 +140,8 @@ template <class Impl>
void
DefaultIEW<Impl>::regStats()
{
+ using namespace Stats;
+
instQueue.regStats();
//ldstQueue.regStats();
@@ -195,13 +197,15 @@ DefaultIEW<Impl>::regStats()
.desc("Number of executed instructions");
iewExecLoadInsts
+ .init(cpu->number_of_threads)
.name(name() + ".iewExecLoadInsts")
- .desc("Number of load instructions executed");
-
+ .desc("Number of load instructions executed")
+ .flags(total);
+/*
iewExecStoreInsts
.name(name() + ".iewExecStoreInsts")
.desc("Number of store instructions executed");
-
+*/
iewExecSquashedInsts
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
@@ -223,6 +227,116 @@ DefaultIEW<Impl>::regStats()
.desc("Number of branch mispredicts detected at execute");
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
+
+ exe_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".EXEC:swp")
+ .desc("number of swp insts executed")
+ .flags(total)
+ ;
+
+ exe_nop
+ .init(cpu->number_of_threads)
+ .name(name() + ".EXEC:nop")
+ .desc("number of nop insts executed")
+ .flags(total)
+ ;
+
+ exe_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".EXEC:refs")
+ .desc("number of memory reference insts executed")
+ .flags(total)
+ ;
+
+ exe_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".EXEC:branches")
+ .desc("Number of branches executed")
+ .flags(total)
+ ;
+
+ issue_rate
+ .name(name() + ".EXEC:rate")
+ .desc("Inst execution rate")
+ .flags(total)
+ ;
+ issue_rate = iewExecutedInsts / cpu->numCycles;
+
+ iewExecStoreInsts
+ .name(name() + ".EXEC:stores")
+ .desc("Number of stores executed")
+ .flags(total)
+ ;
+ iewExecStoreInsts = exe_refs - iewExecLoadInsts;
+/*
+ for (int i=0; i<Num_OpClasses; ++i) {
+ stringstream subname;
+ subname << opClassStrings[i] << "_delay";
+ issue_delay_dist.subname(i, subname.str());
+ }
+*/
+ //
+ // Other stats
+ //
+
+ iewInstsToCommit
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:sent")
+ .desc("cumulative count of insts sent to commit")
+ .flags(total)
+ ;
+
+ writeback_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:count")
+ .desc("cumulative count of insts written-back")
+ .flags(total)
+ ;
+
+ producer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:producers")
+ .desc("num instructions producing a value")
+ .flags(total)
+ ;
+
+ consumer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:consumers")
+ .desc("num instructions consuming a value")
+ .flags(total)
+ ;
+
+ wb_penalized
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:penalized")
+ .desc("number of instrctions required to write to 'other' IQ")
+ .flags(total)
+ ;
+
+ wb_penalized_rate
+ .name(name() + ".WB:penalized_rate")
+ .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+ .flags(total)
+ ;
+
+ wb_penalized_rate = wb_penalized / writeback_count;
+
+ wb_fanout
+ .name(name() + ".WB:fanout")
+ .desc("average fanout of values written-back")
+ .flags(total)
+ ;
+
+ wb_fanout = producer_inst / consumer_inst;
+
+ wb_rate
+ .name(name() + ".WB:rate")
+ .desc("insts written-back per cycle")
+ .flags(total)
+ ;
+ wb_rate = writeback_count / cpu->numCycles;
}
template<class Impl>
@@ -990,6 +1104,8 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
instQueue.advanceTail(inst);
+ exe_nop[tid]++;
+
add_to_iq = false;
} else if (inst->isExecuted()) {
assert(0 && "Instruction shouldn't be executed.\n");
@@ -1124,11 +1240,11 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
- ++iewExecLoadInsts;
+// ++iewExecLoadInsts;
} else if (inst->isStore()) {
ldstQueue.executeStore(inst);
- ++iewExecStoreInsts;
+// ++iewExecStoreInsts;
// If the store had a fault then it may not have a mem req
if (inst->req && !(inst->req->flags & LOCKED)) {
@@ -1146,13 +1262,13 @@ DefaultIEW<Impl>::executeInsts()
} else {
inst->execute();
- ++iewExecutedInsts;
-
inst->setExecuted();
instToCommit(inst);
}
+ updateExeInstStats(inst);
+
// Check if branch was correct. This check happens after the
// instruction is added to the queue because even if the branch
// is mispredicted, the branch instruction itself is still valid.
@@ -1243,17 +1359,20 @@ DefaultIEW<Impl>::writebackInsts()
for (int inst_num = 0; inst_num < issueWidth &&
toCommit->insts[inst_num]; inst_num++) {
DynInstPtr inst = toCommit->insts[inst_num];
+ int tid = inst->threadNumber;
DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
inst->readPC());
+ iewInstsToCommit[tid]++;
+
// Some instructions will be sent to commit without having
// executed because they need commit to handle them.
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
if (!inst->isSquashed() && inst->isExecuted()) {
- instQueue.wakeDependents(inst);
+ int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {
//mark as Ready
@@ -1261,6 +1380,10 @@ DefaultIEW<Impl>::writebackInsts()
inst->renamedDestRegIdx(i));
scoreboard->setReg(inst->renamedDestRegIdx(i));
}
+
+ producer_inst[tid]++;
+ consumer_inst[tid]+= dependents;
+ writeback_count[tid]++;
}
}
}
@@ -1390,3 +1513,39 @@ DefaultIEW<Impl>::tick()
cpu->activityThisCycle();
}
}
+
+template <class Impl>
+void
+DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+ int thread_number = inst->threadNumber;
+
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch())
+ exe_swp[thread_number]++;
+ else
+ iewExecutedInsts++;
+#else
+ iewExecutedInsts[thread_number]++;
+#endif
+
+ //
+ // Control operations
+ //
+ if (inst->isControl())
+ exe_branches[thread_number]++;
+
+ //
+ // Memory operations
+ //
+ if (inst->isMemRef()) {
+ exe_refs[thread_number]++;
+
+ if (inst->isLoad()) {
+ iewExecLoadInsts[thread_number]++;
+ }
+ }
+}
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 283bbdc22..06d9937f2 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -185,7 +185,7 @@ class InstructionQueue
void commit(const InstSeqNum &inst, unsigned tid = 0);
/** Wakes all dependents of a completed instruction. */
- void wakeDependents(DynInstPtr &completed_inst);
+ int wakeDependents(DynInstPtr &completed_inst);
/** Adds a ready memory instruction to the ready list. */
void addReadyMemInst(DynInstPtr &ready_inst);
@@ -479,6 +479,7 @@ class InstructionQueue
/** Stat for number of non-speculative instructions added. */
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
+ Stats::Scalar<> iqInstsIssued;
/** Stat for number of integer instructions issued. */
Stats::Scalar<> iqIntInstsIssued;
// Stats::Scalar<> iqFloatInstsAdded;
@@ -505,6 +506,20 @@ class InstructionQueue
*/
Stats::Scalar<> iqSquashedNonSpecRemoved;
+ Stats::VectorDistribution<> queue_res_dist;
+ Stats::Vector<> n_issued_dist;
+ Stats::VectorDistribution<> issue_delay_dist;
+
+ Stats::Vector<> stat_fu_busy;
+// Stats::Vector<> dist_unissued;
+ Stats::Vector2d<> stat_issued_inst_type;
+
+ Stats::Formula issue_rate;
+// Stats::Formula issue_stores;
+// Stats::Formula issue_op_rate;
+ Stats::Vector<> fu_busy; //cumulative fu busy
+
+ Stats::Formula fu_busy_rate;
};
#endif //__CPU_O3_INST_QUEUE_HH__
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index cfdd25cd5..804bc2472 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -224,6 +224,7 @@ template <class Impl>
void
InstructionQueue<Impl>::regStats()
{
+ using namespace Stats;
iqInstsAdded
.name(name() + ".iqInstsAdded")
.desc("Number of instructions added to the IQ (excludes non-spec)")
@@ -236,6 +237,11 @@ InstructionQueue<Impl>::regStats()
// iqIntInstsAdded;
+ iqInstsIssued
+ .name(name() + ".iqInstsIssued")
+ .desc("Number of instructions issued")
+ .prereq(iqInstsIssued);
+
iqIntInstsIssued
.name(name() + ".iqIntInstsIssued")
.desc("Number of integer instructions issued")
@@ -291,6 +297,103 @@ InstructionQueue<Impl>::regStats()
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
+ queue_res_dist
+ .init(Num_OpClasses, 0, 99, 2)
+ .name(name() + ".IQ:residence:")
+ .desc("cycles from dispatch to issue")
+ .flags(total | pdf | cdf )
+ ;
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ queue_res_dist.subname(i, opClassStrings[i]);
+ }
+ n_issued_dist
+ .init(totalWidth + 1)
+ .name(name() + ".ISSUE:issued_per_cycle")
+ .desc("Number of insts issued each cycle")
+ .flags(total | pdf | dist)
+ ;
+/*
+ dist_unissued
+ .init(Num_OpClasses+2)
+ .name(name() + ".ISSUE:unissued_cause")
+ .desc("Reason ready instruction not issued")
+ .flags(pdf | dist)
+ ;
+ for (int i=0; i < (Num_OpClasses + 2); ++i) {
+ dist_unissued.subname(i, unissued_names[i]);
+ }
+*/
+ stat_issued_inst_type
+ .init(numThreads,Num_OpClasses)
+ .name(name() + ".ISSUE:FU_type")
+ .desc("Type of FU issued")
+ .flags(total | pdf | dist)
+ ;
+ stat_issued_inst_type.ysubnames(opClassStrings);
+
+ //
+ // How long did instructions for a particular FU type wait prior to issue
+ //
+
+ issue_delay_dist
+ .init(Num_OpClasses,0,99,2)
+ .name(name() + ".ISSUE:")
+ .desc("cycles from operands ready to issue")
+ .flags(pdf | cdf)
+ ;
+
+ for (int i=0; i<Num_OpClasses; ++i) {
+ stringstream subname;
+ subname << opClassStrings[i] << "_delay";
+ issue_delay_dist.subname(i, subname.str());
+ }
+
+ issue_rate
+ .name(name() + ".ISSUE:rate")
+ .desc("Inst issue rate")
+ .flags(total)
+ ;
+ issue_rate = iqInstsIssued / cpu->numCycles;
+/*
+ issue_stores
+ .name(name() + ".ISSUE:stores")
+ .desc("Number of stores issued")
+ .flags(total)
+ ;
+ issue_stores = exe_refs - exe_loads;
+*/
+/*
+ issue_op_rate
+ .name(name() + ".ISSUE:op_rate")
+ .desc("Operation issue rate")
+ .flags(total)
+ ;
+ issue_op_rate = issued_ops / numCycles;
+*/
+ stat_fu_busy
+ .init(Num_OpClasses)
+ .name(name() + ".ISSUE:fu_full")
+ .desc("attempts to use FU when none available")
+ .flags(pdf | dist)
+ ;
+ for (int i=0; i < Num_OpClasses; ++i) {
+ stat_fu_busy.subname(i, opClassStrings[i]);
+ }
+
+ fu_busy
+ .init(numThreads)
+ .name(name() + ".ISSUE:fu_busy_cnt")
+ .desc("FU busy when requested")
+ .flags(total)
+ ;
+
+ fu_busy_rate
+ .name(name() + ".ISSUE:fu_busy_rate")
+ .desc("FU busy rate (busy events/executed inst)")
+ .flags(total)
+ ;
+ fu_busy_rate = fu_busy / iqInstsIssued;
+
for ( int i=0; i < numThreads; i++) {
// Tell mem dependence unit to reg stats as well.
memDepUnit[i].regStats();
@@ -658,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int idx = fuPool->getUnit(op_class);
+ int tid = issuing_inst->threadNumber;
+
if (idx == -2) {
assert(op_class == No_OpClass);
@@ -666,7 +771,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
" %#x [sn:%lli]\n",
- issuing_inst->threadNumber, issuing_inst->readPC(),
+ tid, issuing_inst->readPC(),
issuing_inst->seqNum);
readyInsts[op_class].pop();
@@ -685,14 +790,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// Memory instructions can not be freed from the IQ until they
// complete.
++freeEntries;
- count[issuing_inst->threadNumber]--;
+ count[tid]--;
issuing_inst->removeInIQ();
} else {
- memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+ memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
+ stat_issued_inst_type[tid][op_class]++;
} else if (idx != -1) {
int op_latency = fuPool->getOpLatency(op_class);
@@ -722,7 +828,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
"[sn:%lli]\n",
- issuing_inst->threadNumber, issuing_inst->readPC(),
+ tid, issuing_inst->readPC(),
issuing_inst->seqNum);
readyInsts[op_class].pop();
@@ -741,14 +847,17 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// Memory instructions can not be freed from the IQ until they
// complete.
++freeEntries;
- count[issuing_inst->threadNumber]--;
+ count[tid]--;
issuing_inst->removeInIQ();
} else {
- memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+ memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
+ stat_issued_inst_type[tid][op_class]++;
} else {
+ stat_fu_busy[op_class]++;
+ fu_busy[tid]++;
++order_it;
}
}
@@ -808,9 +917,11 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
}
template <class Impl>
-void
+int
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
{
+ int dependents = 0;
+
DPRINTF(IQ, "Waking dependents of completed instruction.\n");
assert(!completed_inst->isSquashed());
@@ -875,6 +986,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
curr = prev->next;
prev->inst = NULL;
+ ++dependents;
+
delete prev;
}
@@ -886,6 +999,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
// Mark the scoreboard as having that register ready.
regScoreboard[dest_reg] = true;
}
+ return dependents;
}
template <class Impl>
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh
index d5beccde9..c6f8f97aa 100644
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -90,7 +90,7 @@ class DefaultRename
Squashing,
Blocked,
Unblocking,
- BarrierStall
+ SerializeStall
};
private:
@@ -359,8 +359,8 @@ class DefaultRename
/** Tracks which stages are telling decode to stall. */
Stalls stalls[Impl::MaxThreads];
- /** The barrier instruction that rename has stalled on. */
- DynInstPtr barrierInst[Impl::MaxThreads];
+ /** The serialize instruction that rename has stalled on. */
+ DynInstPtr serializeInst[Impl::MaxThreads];
/** Records if rename needs to serialize on the next instruction for any
* thread.
@@ -419,8 +419,8 @@ class DefaultRename
Stats::Scalar<> renameIdleCycles;
/** Stat for total number of cycles spent blocking. */
Stats::Scalar<> renameBlockCycles;
- /** Stat for total number of cycles spent stalling for a barrier. */
- Stats::Scalar<> renameBarrierCycles;
+ /** Stat for total number of cycles spent stalling for a serializing inst. */
+ Stats::Scalar<> renameSerializeStallCycles;
/** Stat for total number of cycles spent running normally. */
Stats::Scalar<> renameRunCycles;
/** Stat for total number of cycles spent unblocking. */
@@ -446,6 +446,8 @@ class DefaultRename
Stats::Scalar<> renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
Stats::Scalar<> renameUndoneMaps;
+ Stats::Scalar<> renamedSerializing;
+ Stats::Scalar<> renamedTempSerializing;
};
#endif // __CPU_O3_RENAME_HH__
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index 441118ef1..e29211921 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -53,7 +53,7 @@ DefaultRename<Impl>::DefaultRename(Params *params)
stalls[i].iew = false;
stalls[i].commit = false;
- barrierInst[i] = NULL;
+ serializeInst[i] = NULL;
instsInProgress[i] = 0;
@@ -78,69 +78,79 @@ void
DefaultRename<Impl>::regStats()
{
renameSquashCycles
- .name(name() + ".renameSquashCycles")
+ .name(name() + ".RENAME:SquashCycles")
.desc("Number of cycles rename is squashing")
.prereq(renameSquashCycles);
renameIdleCycles
- .name(name() + ".renameIdleCycles")
+ .name(name() + ".RENAME:IdleCycles")
.desc("Number of cycles rename is idle")
.prereq(renameIdleCycles);
renameBlockCycles
- .name(name() + ".renameBlockCycles")
+ .name(name() + ".RENAME:BlockCycles")
.desc("Number of cycles rename is blocking")
.prereq(renameBlockCycles);
- renameBarrierCycles
- .name(name() + ".renameBarrierCycles")
- .desc("Number of cycles rename is blocking due to a barrier stall")
- .prereq(renameBarrierCycles);
+ renameSerializeStallCycles
+ .name(name() + ".RENAME:serializeStallCycles")
+ .desc("count of cycles rename stalled for serializing inst")
+ .flags(Stats::total);
renameRunCycles
- .name(name() + ".renameRunCycles")
+ .name(name() + ".RENAME:RunCycles")
.desc("Number of cycles rename is running")
.prereq(renameIdleCycles);
renameUnblockCycles
- .name(name() + ".renameUnblockCycles")
+ .name(name() + ".RENAME:UnblockCycles")
.desc("Number of cycles rename is unblocking")
.prereq(renameUnblockCycles);
renameRenamedInsts
- .name(name() + ".renameRenamedInsts")
+ .name(name() + ".RENAME:RenamedInsts")
.desc("Number of instructions processed by rename")
.prereq(renameRenamedInsts);
renameSquashedInsts
- .name(name() + ".renameSquashedInsts")
+ .name(name() + ".RENAME:SquashedInsts")
.desc("Number of squashed instructions processed by rename")
.prereq(renameSquashedInsts);
renameROBFullEvents
- .name(name() + ".renameROBFullEvents")
+ .name(name() + ".RENAME:ROBFullEvents")
.desc("Number of times rename has blocked due to ROB full")
.prereq(renameROBFullEvents);
renameIQFullEvents
- .name(name() + ".renameIQFullEvents")
+ .name(name() + ".RENAME:IQFullEvents")
.desc("Number of times rename has blocked due to IQ full")
.prereq(renameIQFullEvents);
renameLSQFullEvents
- .name(name() + ".renameLSQFullEvents")
+ .name(name() + ".RENAME:LSQFullEvents")
.desc("Number of times rename has blocked due to LSQ full")
.prereq(renameLSQFullEvents);
renameFullRegistersEvents
- .name(name() + ".renameFullRegisterEvents")
+ .name(name() + ".RENAME:FullRegisterEvents")
.desc("Number of times there has been no free registers")
.prereq(renameFullRegistersEvents);
renameRenamedOperands
- .name(name() + ".renameRenamedOperands")
+ .name(name() + ".RENAME:RenamedOperands")
.desc("Number of destination operands rename has renamed")
.prereq(renameRenamedOperands);
renameRenameLookups
- .name(name() + ".renameRenameLookups")
+ .name(name() + ".RENAME:RenameLookups")
.desc("Number of register rename lookups that rename has made")
.prereq(renameRenameLookups);
renameCommittedMaps
- .name(name() + ".renameCommittedMaps")
+ .name(name() + ".RENAME:CommittedMaps")
.desc("Number of HB maps that are committed")
.prereq(renameCommittedMaps);
renameUndoneMaps
- .name(name() + ".renameUndoneMaps")
+ .name(name() + ".RENAME:UndoneMaps")
.desc("Number of HB maps that are undone due to squashing")
.prereq(renameUndoneMaps);
+ renamedSerializing
+ .name(name() + ".RENAME:serializingInsts")
+ .desc("count of serializing insts renamed")
+ .flags(Stats::total)
+ ;
+ renamedTempSerializing
+ .name(name() + ".RENAME:tempSerializingInsts")
+ .desc("count of temporary serializing insts renamed")
+ .flags(Stats::total)
+ ;
}
template <class Impl>
@@ -254,7 +264,7 @@ DefaultRename<Impl>::squash(unsigned tid)
// cycle and there should be space to hold everything due to the squash.
if (renameStatus[tid] == Blocked ||
renameStatus[tid] == Unblocking ||
- renameStatus[tid] == BarrierStall) {
+ renameStatus[tid] == SerializeStall) {
#if !FULL_SYSTEM
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
@@ -267,7 +277,7 @@ DefaultRename<Impl>::squash(unsigned tid)
#else
toDecode->renameUnblock[tid] = 1;
#endif
- barrierInst[tid] = NULL;
+ serializeInst[tid] = NULL;
}
// Set the status to Squashing.
@@ -370,8 +380,8 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
++renameBlockCycles;
} else if (renameStatus[tid] == Squashing) {
++renameSquashCycles;
- } else if (renameStatus[tid] == BarrierStall) {
- ++renameBarrierCycles;
+ } else if (renameStatus[tid] == SerializeStall) {
+ ++renameSerializeStallCycles;
}
if (renameStatus[tid] == Running ||
@@ -535,14 +545,18 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
DPRINTF(Rename, "Serialize before instruction encountered.\n");
- if (!inst->isTempSerializeBefore())
+ if (!inst->isTempSerializeBefore()) {
+ renamedSerializing++;
inst->setSerializeHandled();
+ } else {
+ renamedTempSerializing++;
+ }
- // Change status over to BarrierStall so that other stages know
+ // Change status over to SerializeStall so that other stages know
// what this is blocked on.
- renameStatus[tid] = BarrierStall;
+ renameStatus[tid] = SerializeStall;
- barrierInst[tid] = inst;
+ serializeInst[tid] = inst;
blockThisCycle = true;
@@ -716,9 +730,9 @@ DefaultRename<Impl>::block(unsigned tid)
wroteToTimeBuffer = true;
}
- // Rename can not go from BarrierStall to Blocked, otherwise it would
- // not know to complete the barrier stall.
- if (renameStatus[tid] != BarrierStall) {
+ // Rename can not go from SerializeStall to Blocked, otherwise it would
+ // not know to complete the serialize stall.
+ if (renameStatus[tid] != SerializeStall) {
// Set status to Blocked.
renameStatus[tid] = Blocked;
return true;
@@ -735,7 +749,7 @@ DefaultRename<Impl>::unblock(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
// Rename is done unblocking if the skid buffer is empty.
- if (skidBuffer[tid].empty() && renameStatus[tid] != BarrierStall) {
+ if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
@@ -1008,9 +1022,9 @@ DefaultRename<Impl>::checkStall(unsigned tid)
} else if (renameMap[tid]->numFreeEntries() <= 0) {
DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
ret_val = true;
- } else if (renameStatus[tid] == BarrierStall &&
+ } else if (renameStatus[tid] == SerializeStall &&
(!emptyROB[tid] || instsInProgress[tid])) {
- DPRINTF(Rename,"[tid:%i]: Stall: Barrier stall and ROB is not "
+ DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
"empty.\n",
tid);
ret_val = true;
@@ -1064,7 +1078,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
// if so then go to unblocking
// If status was Squashing
// check if squashing is not high. Switch to running this cycle.
- // If status was barrier stall
+ // If status was serialize stall
// check if ROB is empty and no insts are in flight to the ROB
readFreeEntries(tid);
@@ -1113,12 +1127,12 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
return false;
}
- if (renameStatus[tid] == BarrierStall) {
+ if (renameStatus[tid] == SerializeStall) {
// Stall ends once the ROB is free.
- DPRINTF(Rename, "[tid:%u]: Done with barrier stall, switching to "
+ DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
"unblocking.\n", tid);
- DynInstPtr barr_inst = barrierInst[tid];
+ DynInstPtr serial_inst = serializeInst[tid];
renameStatus[tid] = Unblocking;
@@ -1126,21 +1140,21 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
"PC %#x.\n",
- tid, barr_inst->seqNum, barr_inst->readPC());
+ tid, serial_inst->seqNum, serial_inst->readPC());
// Put instruction into queue here.
- barr_inst->clearSerializeBefore();
+ serial_inst->clearSerializeBefore();
if (!skidBuffer[tid].empty()) {
- skidBuffer[tid].push_front(barr_inst);
+ skidBuffer[tid].push_front(serial_inst);
} else {
- insts[tid].push_front(barr_inst);
+ insts[tid].push_front(serial_inst);
}
DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
" Adding to front of list.", tid);
- barrierInst[tid] = NULL;
+ serializeInst[tid] = NULL;
return true;
}