From 5c1742b822c1f4d640d30963a908386caf8c4a6e Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 18 Apr 2009 10:42:29 -0400 Subject: o3-delay-slot-bpred: fix decode stage handling of uncdtl. branches.\n decode stage was not setting the predicted PC correctly or passing that information back to fetch correctly --- src/cpu/o3/bpred_unit.hh | 5 ++++ src/cpu/o3/bpred_unit_impl.hh | 55 ++++++++++++++++++++++++++++++++++++------- src/cpu/o3/decode_impl.hh | 29 ++++++++++++++++------- src/cpu/o3/fetch_impl.hh | 16 +++++++------ src/cpu/o3/iew_impl.hh | 2 +- 5 files changed, 83 insertions(+), 24 deletions(-) (limited to 'src/cpu/o3') diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh index b32d2bd23..a11582ca7 100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/o3/bpred_unit.hh @@ -188,6 +188,10 @@ class BPredUnit wasCall(0), bpHistory(bp_history) { } + bool operator==(const PredictorHistory &entry) const { + return this->seqNum == entry.seqNum; + } + /** The sequence number for the predictor history entry. */ InstSeqNum seqNum; @@ -220,6 +224,7 @@ class BPredUnit }; typedef std::list History; + typedef typename History::iterator HistoryIt; /** * The per-thread predictor history. This is used to update the predictor diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 2fa59280d..26aba986d 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -36,6 +36,8 @@ #include "params/DerivO3CPU.hh" +#include + template BPredUnit::BPredUnit(DerivO3CPUParams *params) : _name(params->name + ".BPredUnit"), @@ -173,6 +175,10 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) tid, pred_taken, inst->readPC()); } + DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history " + "for PC %#x\n", + tid, inst->seqNum, inst->readPC()); + PredictorHistory predict_record(inst->seqNum, PC, pred_taken, bp_history, tid); @@ -240,7 +246,8 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); + DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added." + "predHist.size(): %i\n", tid, inst->seqNum, predHist[tid].size()); return pred_taken; } @@ -249,7 +256,7 @@ template void BPredUnit::update(const InstSeqNum &done_sn, unsigned tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until " + DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until " "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && @@ -290,7 +297,12 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) // This call should delete the bpHistory. BPSquash(pred_hist.front().bpHistory); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] " + "PC %#x.\n", tid, pred_hist.front().seqNum, pred_hist.front().PC); + pred_hist.pop_front(); + + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); } } @@ -305,6 +317,13 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, // Now that we know that a branch was mispredicted, we need to undo // all the branches that have been seen up until this branch and // fix up everything. + // NOTE: This should be call conceivably in 2 scenarios: + // (1) After an branch is executed, it updates its status in the ROB + // The commit stage then checks the ROB update and sends a signal to + // the fetch stage to squash history after the mispredict + // (2) In the decode stage, you can find out early if a unconditional + // PC-relative, branch was predicted incorrectly. If so, a signal + // to the fetch stage is sent to squash history after the mispredict History &pred_hist = predHist[tid]; @@ -314,22 +333,42 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, "setting target to %#x.\n", tid, squashed_sn, corr_target); + // Squash All Branches AFTER this mispredicted branch squash(squashed_sn, tid); // If there's a squash due to a syscall, there may not be an entry // corresponding to the squash. In that case, don't bother trying to // fix up the entry. if (!pred_hist.empty()) { - assert(pred_hist.front().seqNum == squashed_sn); - if (pred_hist.front().usedRAS) { + + HistoryIt hist_it = pred_hist.begin(); + //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(), + // squashed_sn); + + //assert(hist_it != pred_hist.end()); + if (pred_hist.front().seqNum != squashed_sn) { + DPRINTF(Fetch, "Front sn %i != Squash sn %i\n", + pred_hist.front().seqNum, squashed_sn); + + assert(pred_hist.front().seqNum == squashed_sn); + } + + + if ((*hist_it).usedRAS) { ++RASIncorrect; } - BPUpdate(pred_hist.front().PC, actually_taken, + BPUpdate((*hist_it).PC, actually_taken, pred_hist.front().bpHistory); - BTB.update(pred_hist.front().PC, corr_target, tid); - pred_hist.pop_front(); + BTB.update((*hist_it).PC, corr_target, tid); + + DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] " + "PC %#x.\n", tid, (*hist_it).seqNum, (*hist_it).PC); + + pred_hist.erase(hist_it); + + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); } } @@ -386,7 +425,7 @@ template void BPredUnit::dump() { - typename History::iterator pred_hist_it; + HistoryIt pred_hist_it; for (int i = 0; i < Impl::MaxThreads; ++i) { if (!predHist[i].empty()) { diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 015bc8d7f..d92e97f61 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -262,28 +262,30 @@ template void DefaultDecode::squash(DynInstPtr &inst, unsigned tid) { - DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction " - "detected at decode.\n", tid); + DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch prediction " + "detected at decode.\n", tid, inst->seqNum); // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; toFetch->decodeInfo[tid].predIncorrect = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].squash = true; - toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); - ///FIXME There needs to be a way to set the nextPC and nextNPC - ///explicitly for ISAs with delay slots. - toFetch->decodeInfo[tid].nextNPC = - inst->branchTarget() + sizeof(TheISA::MachInst); + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC(); + #if ISA_HAS_DELAY_SLOT + toFetch->decodeInfo[tid].nextPC = inst->readPC() + sizeof(TheISA::MachInst); + toFetch->decodeInfo[tid].nextNPC = inst->branchTarget(); toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); #else + toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); + toFetch->decodeInfo[tid].nextNPC = + inst->branchTarget() + sizeof(TheISA::MachInst); toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); #endif + InstSeqNum squash_seq_num = inst->seqNum; // Might have to tell fetch to unblock. @@ -738,8 +740,19 @@ DefaultDecode::decodeInsts(unsigned tid) // a check at the end squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); + +#if ISA_HAS_DELAY_SLOT + DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x PredNextPC: %#x\n", + inst->seqNum, inst->readPC() + sizeof(TheISA::MachInst), target); + + //The micro pc after an instruction level branch should be 0 + inst->setPredTarg(inst->readPC() + sizeof(TheISA::MachInst), target, 0); +#else + DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x PredNextPC: %#x\n", + inst->seqNum, target, target + sizeof(TheISA::MachInst)); //The micro pc after an instruction level branch should be 0 inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0); +#endif break; } } diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c3be74234..279d3e56a 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -524,12 +524,13 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); -/* if (predict_taken) { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n", - tid, pred_PC); + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %#x.\n", + tid, inst->seqNum, pred_PC); } else { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid); - }*/ + DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n", + tid, inst->seqNum); + } #if ISA_HAS_DELAY_SLOT next_PC = next_NPC; @@ -544,8 +545,9 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, next_PC += instSize; next_NPC = next_PC + instSize; #endif -/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", - tid, next_PC, next_NPC);*/ + + DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %#x and then %#x.\n", + tid, inst->seqNum, next_PC, next_NPC); inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(predict_taken); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 1daecd669..778ff444f 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1282,7 +1282,7 @@ DefaultIEW::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "Predicted target was %#x, %#x.\n", + DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n", inst->readPredPC(), inst->readPredNPC()); DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x," " NPC: %#x.\n", inst->readNextPC(), -- cgit v1.2.3