summaryrefslogtreecommitdiff
path: root/src/cpu/ozone/lw_back_end_impl.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/ozone/lw_back_end_impl.hh')
-rw-r--r--src/cpu/ozone/lw_back_end_impl.hh317
1 files changed, 207 insertions, 110 deletions
diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh
index a4f1d805e..f87a2bc57 100644
--- a/src/cpu/ozone/lw_back_end_impl.hh
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@ -141,13 +141,14 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), tcSquash(false),
- width(params->backEndWidth), exactFullStall(true)
+ latency(params->backEndLatency),
+ width(params->backEndWidth), lsqLimits(params->lsqLimits),
+ exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
- numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
@@ -184,78 +185,79 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
- rob_cap_events
+ LSQ.regStats();
+
+ robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
- rob_cap_inst_count
+ robCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
- iq_cap_events
+ iqCapEvents
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
- iq_cap_inst_count
+ iqCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
-
- exe_inst
+ exeInst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
- exe_swp
+ exeSwp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
- exe_nop
+ exeNop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
- exe_refs
+ exeRefs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
- exe_loads
+ exeLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
- exe_branches
+ exeBranches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
- issued_ops
+ issuedOps
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
@@ -272,28 +274,28 @@ LWBackEnd<Impl>::regStats()
//
// Other stats
//
- lsq_forw_loads
+ lsqForwLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
- inv_addr_loads
+ invAddrLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
- inv_addr_swpfs
+ invAddrSwpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
- lsq_blocked_loads
+ lsqBlockedLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
@@ -305,51 +307,52 @@ LWBackEnd<Impl>::regStats()
.desc("Number of times LSQ instruction issued early")
;
- n_issued_dist
+ nIssuedDist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
- issue_delay_dist
+/*
+ issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
- queue_res_dist
+ queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
+ queueResDist.subname(i, opClassStrings[i]);
}
-
- writeback_count
+*/
+ writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
- producer_inst
+ producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
- consumer_inst
+ consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
- wb_penalized
+ wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
@@ -357,71 +360,71 @@ LWBackEnd<Impl>::regStats()
;
- wb_penalized_rate
+ wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
- wb_penalized_rate = wb_penalized / writeback_count;
+ wbPenalizedRate = wbPenalized / writebackCount;
- wb_fanout
+ wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
- wb_fanout = producer_inst / consumer_inst;
+ wbFanout = producerInst / consumerInst;
- wb_rate
+ wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
- wb_rate = writeback_count / cpu->numCycles;
+ wbRate = writebackCount / cpu->numCycles;
- stat_com_inst
+ statComInst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
- stat_com_swp
+ statComSwp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
- stat_com_refs
+ statComRefs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
- stat_com_loads
+ statComLoads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
- stat_com_membars
+ statComMembars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
- stat_com_branches
+ statComBranches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
- n_committed_dist
+ nCommittedDist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
@@ -441,14 +444,14 @@ LWBackEnd<Impl>::regStats()
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
- commit_eligible
+ commitEligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
- commit_eligible_samples
+ commitEligibleSamples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
@@ -465,37 +468,38 @@ LWBackEnd<Impl>::regStats()
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
;
- ROB_fcount
+ ROBFcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
- ROB_count
+ ROBCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
- ROB_full_rate
+ ROBFullRate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
- ROB_full_rate = ROB_fcount / cpu->numCycles;
+ ROBFullRate = ROBFcount / cpu->numCycles;
- ROB_occ_rate
+ ROBOccRate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
- ROB_occ_rate = ROB_count / cpu->numCycles;
-
- ROB_occ_dist
+ ROBOccRate = ROBCount / cpu->numCycles;
+/*
+ ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
+*/
}
template <class Impl>
@@ -588,17 +592,21 @@ LWBackEnd<Impl>::tick()
{
DPRINTF(BE, "Ticking back end\n");
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched();
return;
}
- ROB_count[0]+= numInsts;
+ readyInstsForCommit();
- wbCycle = 0;
+ numInstsToWB.advance();
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
+ ROBCount[0]+= numInsts;
+
+ wbCycle = 0;
#if FULL_SYSTEM
checkInterrupts();
@@ -674,6 +682,10 @@ LWBackEnd<Impl>::dispatchInsts()
while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer
+ if (lsqLimits && LSQ.isFull()) {
+ break;
+ }
+
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
break;
@@ -732,6 +744,7 @@ LWBackEnd<Impl>::dispatchInsts()
inst->setIssued();
inst->setExecuted();
inst->setCanCommit();
+ numInstsToWB[0]++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n",
@@ -866,8 +879,17 @@ LWBackEnd<Impl>::executeInsts()
if (inst->isLoad()) {
LSQ.executeLoad(inst);
} else if (inst->isStore()) {
- LSQ.executeStore(inst);
- if (inst->req && !(inst->req->getFlags() & LOCKED)) {
+ Fault fault = LSQ.executeStore(inst);
+
+ if (!inst->isStoreConditional() && fault == NoFault) {
+ inst->setExecuted();
+
+ instToCommit(inst);
+ } else if (fault != NoFault) {
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
inst->setExecuted();
instToCommit(inst);
@@ -908,36 +930,54 @@ LWBackEnd<Impl>::executeInsts()
}
}
- issued_ops[0]+= num_executed;
- n_issued_dist[num_executed]++;
+ issuedOps[0]+= num_executed;
+ nIssuedDist[num_executed]++;
}
template<class Impl>
void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
-
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
-
if (inst->isExecuted()) {
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
- producer_inst[0]++;
- consumer_inst[0]+= dependents;
+ producerInst[0]++;
+ consumerInst[0]+= dependents;
}
}
}
- writeback_count[0]++;
+ writeback.push_back(inst);
+
+ numInstsToWB[0]++;
+
+ writebackCount[0]++;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+ for (int i = numInstsToWB[-latency];
+ !writeback.empty() && i;
+ --i)
+ {
+ DynInstPtr inst = writeback.front();
+ writeback.pop_front();
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ }
+ }
}
+
#if 0
template <class Impl>
void
@@ -1010,7 +1050,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
if (inst->isNonSpeculative() ||
- inst->isStoreConditional() ||
+ (inst->isStoreConditional() && inst->getFault() == NoFault) ||
inst->isMemBarrier() ||
inst->isWriteBarrier()) {
#if !FULL_SYSTEM
@@ -1151,6 +1191,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
++freed_regs;
}
+#if FULL_SYSTEM
+ if (thread->profile) {
+// bool usermode =
+// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// thread->profilePC = usermode ? 1 : inst->readPC();
+ thread->profilePC = inst->readPC();
+ ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
+ inst->staticInst);
+
+ if (node)
+ thread->profileNode = node;
+ }
+#endif
+
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
@@ -1158,6 +1212,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
inst->traceData = NULL;
}
+ if (inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
@@ -1207,9 +1264,9 @@ LWBackEnd<Impl>::commitInsts()
while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) {
instList.back()->clearDependents();
+ ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back();
--numInsts;
- ROBSquashedInsts[instList.back()->threadNumber]++;
continue;
}
@@ -1221,7 +1278,7 @@ LWBackEnd<Impl>::commitInsts()
break;
}
}
- n_committed_dist.sample(inst_num);
+ nCommittedDist.sample(inst_num);
}
template <class Impl>
@@ -1231,10 +1288,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
LSQ.squash(sn);
int freed_regs = 0;
- InstListIt waiting_list_end = waitingList.end();
+ InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin();
- while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+ while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
++insts_it;
@@ -1260,6 +1317,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
+ panic("Instruction should not be already squashed and on list!");
++insts_it;
continue;
}
@@ -1291,18 +1349,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
--numInsts;
}
- insts_it = waitingList.begin();
- while (!waitingList.empty() && insts_it != waitingList.end()) {
- if ((*insts_it)->seqNum < sn) {
- ++insts_it;
- continue;
- }
- assert((*insts_it)->isSquashed());
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
-
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum);
@@ -1320,6 +1366,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
}
}
+ insts_it = replayList.begin();
+ insts_end_it = replayList.end();
+ while (!replayList.empty() && insts_it != insts_end_it) {
+ if ((*insts_it)->seqNum < sn) {
+ ++insts_it;
+ continue;
+ }
+ assert((*insts_it)->isSquashed());
+
+ replayList.erase(insts_it++);
+ }
+
frontEnd->addFreeRegs(freed_regs);
}
@@ -1392,14 +1450,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
template <class Impl>
void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
- fetchHasFault = true;
-}
-
-template <class Impl>
-void
LWBackEnd<Impl>::switchOut()
{
switchPending = true;
@@ -1416,17 +1466,25 @@ LWBackEnd<Impl>::doSwitchOut()
// yet written back.
assert(robEmpty());
assert(!LSQ.hasStoresToWB());
-
+ writeback.clear();
+ for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+ numInstsToWB.advance();
+
+// squash(0);
+ assert(waitingList.empty());
+ assert(instList.empty());
+ assert(replayList.empty());
+ assert(writeback.empty());
LSQ.switchOut();
-
- squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
{
- switchedOut = false;
+ assert(!squashPending);
+ squashSeqNum = 0;
+ squashNextPC = 0;
tcSquash = false;
trapSquash = false;
@@ -1451,27 +1509,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
+ exeSwp[thread_number]++;
else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#else
- exe_inst[thread_number]++;
+ exeInst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
- exe_branches[thread_number]++;
+ exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exe_refs[thread_number]++;
+ exeRefs[thread_number]++;
if (inst->isLoad())
- exe_loads[thread_number]++;
+ exeLoads[thread_number]++;
}
}
@@ -1491,33 +1549,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
- stat_com_swp[tid]++;
+ statComSwp[tid]++;
} else {
- stat_com_inst[tid]++;
+ statComInst[tid]++;
}
#else
- stat_com_inst[tid]++;
+ statComInst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
- stat_com_branches[tid]++;
+ statComBranches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
- stat_com_refs[tid]++;
+ statComRefs[tid]++;
if (inst->isLoad()) {
- stat_com_loads[tid]++;
+ statComLoads[tid]++;
}
}
if (inst->isMemBarrier()) {
- stat_com_membars[tid]++;
+ statComMembars[tid]++;
}
}
@@ -1569,6 +1627,45 @@ LWBackEnd<Impl>::dumpInsts()
++num;
}
+ inst_list_it = --(writeback.end());
+
+ cprintf("Writeback list size: %i\n", writeback.size());
+
+ while (inst_list_it != writeback.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end());