summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKorey Sewell <ksewell@umich.edu>2010-06-23 18:19:18 -0400
committerKorey Sewell <ksewell@umich.edu>2010-06-23 18:19:18 -0400
commitdefab3ffd5d2e37215d3f9433dc9fc754a90a9a9 (patch)
tree1fad74d7a7358dbcf6481781c05afe2ba31a97d6
parent9f0d8f252c2de0b9ac5654b2c35e913831eba756 (diff)
downloadgem5-defab3ffd5d2e37215d3f9433dc9fc754a90a9a9.tar.xz
inorder: update branch predictor
- use InOrderBPred instead of Resource for DPRINTFs - account for DELAY SLOT in updating RAS and in squashing - don't let squashed instructions update the predictor - the BTB needs to use the ASID not the TID to work for multithreaded programs - add stats for BTB hits
-rw-r--r--src/cpu/inorder/resources/bpred_unit.cc95
-rw-r--r--src/cpu/inorder/resources/bpred_unit.hh2
-rw-r--r--src/cpu/inorder/resources/branch_predictor.cc89
-rw-r--r--src/cpu/inorder/resources/execution_unit.cc6
4 files changed, 130 insertions, 62 deletions
diff --git a/src/cpu/inorder/resources/bpred_unit.cc b/src/cpu/inorder/resources/bpred_unit.cc
index c4bb61974..0002c270b 100644
--- a/src/cpu/inorder/resources/bpred_unit.cc
+++ b/src/cpu/inorder/resources/bpred_unit.cc
@@ -103,6 +103,12 @@ BPredUnit::regStats()
.desc("Number of BTB hits")
;
+ BTBHitPct
+ .name(name() + ".BTBHitPct")
+ .desc("BTB Hit Percentage")
+ .precision(6);
+ BTBHitPct = (BTBHits / BTBLookups) * 100;
+
usedRAS
.name(name() + ".usedRAS")
.desc("Number of times the RAS was used to get a target.")
@@ -150,30 +156,35 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, ThreadID tid)
using TheISA::MachInst;
+ int asid = inst->asid;
bool pred_taken = false;
Addr target;
++lookups;
+ DPRINTF(InOrderBPred, "[tid:%i] [sn:%i] %s ... PC%#x doing branch prediction\n",
+ tid, inst->seqNum, inst->staticInst->disassemble(inst->PC),
+ inst->readPC());
+
void *bp_history = NULL;
if (inst->isUncondCtrl()) {
- DPRINTF(Resource, "BranchPred: [tid:%i] Unconditional control.\n", tid);
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i] Unconditional control.\n", tid);
pred_taken = true;
// Tell the BP there was an unconditional branch.
BPUncond(bp_history);
- if (inst->isReturn() && RAS[tid].empty()) {
- DPRINTF(Resource, "BranchPred: [tid:%i] RAS is empty, predicting "
- "false.\n", tid);
- pred_taken = false;
- }
+ if (inst->isReturn() && RAS[tid].empty()) {
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i] RAS is empty, predicting "
+ "false.\n", tid);
+ pred_taken = false;
+ }
} else {
++condPredicted;
pred_taken = BPLookup(PC, bp_history);
- DPRINTF(Resource, "BranchPred: [tid:%i]: Branch predictor predicted %i "
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Branch predictor predicted %i "
"for PC %#x\n",
tid, pred_taken, inst->readPC());
}
@@ -199,22 +210,28 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, ThreadID tid)
RAS[tid].pop();
- DPRINTF(Resource, "BranchPred: [tid:%i]: Instruction %#x is a return, "
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Instruction %#x is a return, "
"RAS predicted target: %#x, RAS index: %i.\n",
tid, inst->readPC(), target, predict_record.RASIndex);
} else {
++BTBLookups;
if (inst->isCall()) {
- RAS[tid].push(PC + sizeof(MachInst));
+#if ISA_HAS_DELAY_SLOT
+ Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC
+#else
+ Addr ras_pc = PC + sizeof(MachInst); // Next PC
+#endif
+
+ RAS[tid].push(ras_pc);
// Record that it was a call so that the top RAS entry can
// be popped off if the speculation is incorrect.
predict_record.wasCall = true;
- DPRINTF(Resource, "BranchPred: [tid:%i] Instruction %#x was a call"
- ", adding %#x to the RAS.\n",
- tid, inst->readPC(), PC + sizeof(MachInst));
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Instruction %#x was a call"
+ ", adding %#x to the RAS index: %i.\n",
+ tid, inst->readPC(), ras_pc, RAS[tid].topIdx());
}
if (inst->isCall() &&
@@ -222,20 +239,20 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, ThreadID tid)
inst->isDirectCtrl()) {
target = inst->branchTarget();
- DPRINTF(Fetch, "BranchPred: [tid:%i]: Setting %#x predicted"
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Setting %#x predicted"
" target to %#x.\n",
tid, inst->readPC(), target);
- } else if (BTB.valid(PC, tid)) {
+ } else if (BTB.valid(PC, asid)) {
++BTBHits;
// If it's not a return, use the BTB to get the target addr.
- target = BTB.lookup(PC, tid);
+ target = BTB.lookup(PC, asid);
- DPRINTF(Resource, "BranchPred: [tid:%i]: Instruction %#x predicted"
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: [asid:%i] Instruction %#x predicted"
" target is %#x.\n",
- tid, inst->readPC(), target);
+ tid, asid, inst->readPC(), target);
} else {
- DPRINTF(Resource, "BranchPred: [tid:%i]: BTB doesn't have a "
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: BTB doesn't have a "
"valid entry.\n",tid);
pred_taken = false;
}
@@ -258,7 +275,8 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, ThreadID tid)
predHist[tid].push_front(predict_record);
- DPRINTF(Resource, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size());
+ DPRINTF(InOrderBPred, "[tid:%i] [sn:%i] pushed onto front of predHist ...predHist.size(): %i\n",
+ tid, inst->seqNum, predHist[tid].size());
inst->setBranchPred(pred_taken);
@@ -292,7 +310,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
- DPRINTF(Resource, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
" target: %#x.\n",
tid,
pred_hist.front().RASIndex,
@@ -302,7 +320,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
pred_hist.front().RASTarget);
} else if (pred_hist.front().wasCall) {
- DPRINTF(Resource, "BranchPred: [tid:%i]: Removing speculative entry "
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Removing speculative entry "
"added to the RAS.\n",tid);
RAS[tid].pop();
@@ -331,7 +349,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
++condIncorrect;
- DPRINTF(Resource, "BranchPred: [tid:%i]: Squashing from sequence number %i, "
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Squashing from sequence number %i, "
"setting target to %#x.\n",
tid, squashed_sn, corr_target);
@@ -341,19 +359,38 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
// corresponding to the squash. In that case, don't bother trying to
// fix up the entry.
if (!pred_hist.empty()) {
- if(pred_hist.front().seqNum==squashed_sn){
+ HistoryIt hist_it = pred_hist.begin();
+ //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
+ // squashed_sn);
- assert(pred_hist.front().seqNum == squashed_sn);
- if (pred_hist.front().usedRAS) {
+ //assert(hist_it != pred_hist.end());
+ if (pred_hist.front().seqNum != squashed_sn) {
+ DPRINTF(InOrderBPred, "Front sn %i != Squash sn %i\n",
+ pred_hist.front().seqNum, squashed_sn);
+
+ assert(pred_hist.front().seqNum == squashed_sn);
+ }
+
+
+ if ((*hist_it).usedRAS) {
++RASIncorrect;
}
- BPUpdate(pred_hist.front().PC, actually_taken,
+ BPUpdate((*hist_it).PC, actually_taken,
pred_hist.front().bpHistory);
- BTB.update(pred_hist.front().PC, corr_target, tid);
- pred_hist.pop_front();
- }
+ BTB.update((*hist_it).PC, corr_target, tid);
+
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
+ "PC %#x.\n", tid, (*hist_it).seqNum, (*hist_it).PC);
+
+ pred_hist.erase(hist_it);
+
+ DPRINTF(InOrderBPred, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size());
+
+ } else {
+ DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: [sn:%i] pred_hist empty, can't update.\n",
+ tid, squashed_sn);
}
}
diff --git a/src/cpu/inorder/resources/bpred_unit.hh b/src/cpu/inorder/resources/bpred_unit.hh
index 72229ca70..b17200fd2 100644
--- a/src/cpu/inorder/resources/bpred_unit.hh
+++ b/src/cpu/inorder/resources/bpred_unit.hh
@@ -219,6 +219,7 @@ class BPredUnit
};
typedef std::list<PredictorHistory> History;
+ typedef History::iterator HistoryIt;
/**
* The per-thread predictor history. This is used to update the predictor
@@ -255,6 +256,7 @@ class BPredUnit
Stats::Scalar usedRAS;
/** Stat for number of times the RAS is incorrect. */
Stats::Scalar RASIncorrect;
+ Stats::Formula BTBHitPct;
};
#endif // __CPU_INORDER_BPRED_UNIT_HH__
diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc
index a4ebfe33d..b0a497837 100644
--- a/src/cpu/inorder/resources/branch_predictor.cc
+++ b/src/cpu/inorder/resources/branch_predictor.cc
@@ -78,42 +78,48 @@ BranchPredictor::execute(int slot_num)
{
case PredictBranch:
{
- Addr pred_PC = inst->readNextPC();
-
- if (inst->isControl()) {
- // If not, the pred_PC be updated to pc+8
- // If predicted, the pred_PC will be updated to new target value
- bool predict_taken = branchPred.predict(inst, pred_PC, tid);
+ if (inst->seqNum > cpu->squashSeqNum[tid] &&
+ curTick == cpu->lastSquashCycle[tid]) {
+ DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping prediction \n",
+ tid, inst->seqNum);
+ } else {
+ Addr pred_PC = inst->readNextPC();
- if (predict_taken) {
- DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
- tid, seq_num);
+ if (inst->isControl()) {
+ // If not, the pred_PC be updated to pc+8
+ // If predicted, the pred_PC will be updated to new target value
+ bool predict_taken = branchPred.predict(inst, pred_PC, tid);
- inst->setPredTarg(pred_PC);
+ if (predict_taken) {
+ DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
+ tid, seq_num);
- predictedTaken++;
- } else {
- DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted false.\n",
- tid, seq_num);
+ inst->setPredTarg(pred_PC);
- if (inst->isCondDelaySlot())
- {
- inst->setPredTarg(inst->readPC() + (2 * instSize));
+ predictedTaken++;
} else {
- inst->setPredTarg(pred_PC);
- }
+ DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted false.\n",
+ tid, seq_num);
- predictedNotTaken++;
- }
+ if (inst->isCondDelaySlot())
+ {
+ inst->setPredTarg(inst->readPC() + (2 * instSize));
+ } else {
+ inst->setPredTarg(pred_PC);
+ }
- inst->setBranchPred(predict_taken);
+ predictedNotTaken++;
+ }
- DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Predicted PC is %08p.\n",
+ inst->setBranchPred(predict_taken);
+
+ DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Predicted PC is %08p.\n",
tid, seq_num, pred_PC);
- } else {
- DPRINTF(InOrderBPred, "[tid:%i]: Ignoring [sn:%i] because this isn't "
- "a control instruction.\n", tid, seq_num);
+ } else {
+ //DPRINTF(InOrderBPred, "[tid:%i]: Ignoring [sn:%i] because this isn't "
+ // "a control instruction.\n", tid, seq_num);
+ }
}
bpred_req->done();
@@ -122,11 +128,17 @@ BranchPredictor::execute(int slot_num)
case UpdatePredictor:
{
- DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Updating Branch Predictor.\n",
- tid, seq_num);
+ if (inst->seqNum > cpu->squashSeqNum[tid] &&
+ curTick == cpu->lastSquashCycle[tid]) {
+ DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping branch predictor update \n",
+ tid, inst->seqNum);
+ } else {
+ DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Updating Branch Predictor.\n",
+ tid, seq_num);
- branchPred.update(seq_num, tid);
+ branchPred.update(seq_num, tid);
+ }
bpred_req->done();
}
@@ -141,10 +153,21 @@ void
BranchPredictor::squash(DynInstPtr inst, int squash_stage,
InstSeqNum squash_seq_num, ThreadID tid)
{
- DPRINTF(InOrderBPred, "Squashing...\n");
- Addr corr_targ=inst->readPredPC();
- bool taken=inst->predTaken();
- branchPred.squash(squash_seq_num,corr_targ,taken,tid);
+ DPRINTF(InOrderBPred, "[tid:%i][sn:%i] Squashing...\n", tid, inst->seqNum);
+
+#if ISA_HAS_DELAY_SLOT
+ // We need to squash the actual branch , NOT the delay slot
+ // in the branch predictor
+ squash_seq_num = squash_seq_num - 1;
+#endif
+
+ if(squash_stage>=ThePipeline::BackEndStartStage) {
+ Addr corr_targ=inst->readPredPC();
+ bool taken=inst->predTaken();
+ branchPred.squash(squash_seq_num,corr_targ,taken,tid);
+ } else {
+ branchPred.squash(squash_seq_num, tid);
+ }
}
void
diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc
index 868ebe098..017308585 100644
--- a/src/cpu/inorder/resources/execution_unit.cc
+++ b/src/cpu/inorder/resources/execution_unit.cc
@@ -170,8 +170,14 @@ ExecutionUnit::execute(int slot_num)
if (inst->predTaken()) {
predictedTakenIncorrect++;
+ DPRINTF(InOrderExecute, "[tid:%i] [sn:%i] %s ... PC%#x ... Mispredicts! (Taken)\n",
+ tid, inst->seqNum, inst->staticInst->disassemble(inst->PC),
+ inst->readPC());
} else {
predictedNotTakenIncorrect++;
+ DPRINTF(InOrderExecute, "[tid:%i] [sn:%i] %s ... PC%#x ... Mispredicts! (Not Taken)\n",
+ tid, inst->seqNum, inst->staticInst->disassemble(inst->PC),
+ inst->readPC());
}
} else {
DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: Prediction Correct.\n",