From 0fd4bb7f12d8a633f3ff0abe61d4f3a78bca6f84 Mon Sep 17 00:00:00 2001 From: Mitch Hayenga Date: Tue, 5 Apr 2016 11:48:37 -0500 Subject: cpu: Add an indirect branch target predictor This patch adds a configurable indirect branch predictor that can be indexed by a combination of GHR and path history hashes. Implements the functionality described in: "Target prediction for indirect jumps" by Chang, Hao, and Patt http://dl.acm.org/citation.cfm?id=264209 This is a re-spin of fb9d142 after the revert (bd1c6789). --- src/cpu/pred/BranchPredictor.py | 10 +++ src/cpu/pred/SConscript | 2 + src/cpu/pred/bi_mode.cc | 6 ++ src/cpu/pred/bi_mode.hh | 1 + src/cpu/pred/bpred_unit.cc | 124 +++++++++++++++++++++------ src/cpu/pred/bpred_unit.hh | 24 +++++- src/cpu/pred/indirect.cc | 185 ++++++++++++++++++++++++++++++++++++++++ src/cpu/pred/indirect.hh | 97 +++++++++++++++++++++ src/cpu/pred/tournament.cc | 6 ++ src/cpu/pred/tournament.hh | 2 + 10 files changed, 428 insertions(+), 29 deletions(-) create mode 100644 src/cpu/pred/indirect.cc create mode 100644 src/cpu/pred/indirect.hh (limited to 'src/cpu') diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 5c52fb65e..2d7d0d0e2 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -42,6 +42,16 @@ class BranchPredictor(SimObject): RASSize = Param.Unsigned(16, "RAS size") instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") + useIndirect = Param.Bool(True, "Use indirect branch predictor") + indirectHashGHR = Param.Bool(True, "Hash branch predictor GHR") + indirectHashTargets = Param.Bool(True, "Hash path history targets") + indirectSets = Param.Unsigned(256, "Cache sets for indirect predictor") + indirectWays = Param.Unsigned(2, "Ways for indirect predictor") + indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits") + indirectPathLength = Param.Unsigned(3, + "Previous indirect targets to use for path history") + + class LocalBP(BranchPredictor): type = 'LocalBP' diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 1bf94712d..dca5e8d88 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -35,9 +35,11 @@ if env['TARGET_ISA'] == 'null': SimObject('BranchPredictor.py') +DebugFlag('Indirect') Source('bpred_unit.cc') Source('2bit_local.cc') Source('btb.cc') +Source('indirect.cc') Source('ras.cc') Source('tournament.cc') Source ('bi_mode.cc') diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc index c2a41cd4d..bc974bac8 100644 --- a/src/cpu/pred/bi_mode.cc +++ b/src/cpu/pred/bi_mode.cc @@ -236,6 +236,12 @@ BiModeBP::retireSquashed(void *bp_history) delete history; } +unsigned +BiModeBP::getGHR(void *bp_history) const +{ + return static_cast(bp_history)->globalHistoryReg; +} + void BiModeBP::updateGlobalHistReg(bool taken) { diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh index da7c49f46..120a6ffd9 100644 --- a/src/cpu/pred/bi_mode.hh +++ b/src/cpu/pred/bi_mode.hh @@ -63,6 +63,7 @@ class BiModeBP : public BPredUnit void btbUpdate(Addr branch_addr, void * &bp_history); void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); void retireSquashed(void *bp_history); + unsigned getGHR(void *bp_history) const; private: void updateGlobalHistReg(bool taken); diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index c38927c8d..d12e9f9f7 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -62,6 +62,15 @@ BPredUnit::BPredUnit(const Params *params) params->instShiftAmt, params->numThreads), RAS(numThreads), + useIndirect(params->useIndirect), + iPred(params->indirectHashGHR, + params->indirectHashTargets, + params->indirectSets, + params->indirectWays, + params->indirectTagSize, + params->indirectPathLength, + params->instShiftAmt, + params->numThreads), instShiftAmt(params->instShiftAmt) { for (auto& r : RAS) @@ -117,6 +126,27 @@ BPredUnit::regStats() .name(name() + ".RASInCorrect") .desc("Number of incorrect RAS predictions.") ; + + indirectLookups + .name(name() + ".indirectLookups") + .desc("Number of indirect predictor lookups.") + ; + + indirectHits + .name(name() + ".indirectHits") + .desc("Number of indirect target hits.") + ; + + indirectMisses + .name(name() + ".indirectMisses") + .desc("Number of indirect misses.") + ; + + indirectMispredicted + .name(name() + "indirectMispredicted") + .desc("Number of mispredicted indirect branches.") + ; + } ProbePoints::PMUUPtr @@ -216,31 +246,59 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, tid, pc, pc, RAS[tid].topIdx()); } - if (BTB.valid(pc.instAddr(), tid)) { - ++BTBHits; - - // If it's not a return, use the BTB to get the target addr. - target = BTB.lookup(pc.instAddr(), tid); - - DPRINTF(Branch, "[tid:%i]: Instruction %s predicted" - " target is %s.\n", tid, pc, target); - + if (inst->isDirectCtrl() || !useIndirect) { + // Check BTB on direct branches + if (BTB.valid(pc.instAddr(), tid)) { + ++BTBHits; + + // If it's not a return, use the BTB to get target addr. + target = BTB.lookup(pc.instAddr(), tid); + + DPRINTF(Branch, "[tid:%i]: Instruction %s predicted" + " target is %s.\n", tid, pc, target); + + } else { + DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " + "valid entry.\n",tid); + pred_taken = false; + // The Direction of the branch predictor is altered + // because the BTB did not have an entry + // The predictor needs to be updated accordingly + if (!inst->isCall() && !inst->isReturn()) { + btbUpdate(pc.instAddr(), bp_history); + DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate" + " called for %s\n", tid, seqNum, pc); + } else if (inst->isCall() && !inst->isUncondCtrl()) { + RAS[tid].pop(); + predict_record.pushedRAS = false; + } + TheISA::advancePC(target, inst); + } } else { - DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " - "valid entry.\n",tid); - pred_taken = false; - // The Direction of the branch predictor is altered because the - // BTB did not have an entry - // The predictor needs to be updated accordingly - if (!inst->isCall() && !inst->isReturn()) { - btbUpdate(pc.instAddr(), bp_history); - DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate" - " called for %s\n", tid, seqNum, pc); - } else if (inst->isCall() && !inst->isUncondCtrl()) { - RAS[tid].pop(); - predict_record.pushedRAS = false; + predict_record.wasIndirect = true; + ++indirectLookups; + //Consult indirect predictor on indirect control + if (iPred.lookup(pc.instAddr(), getGHR(bp_history), target, + tid)) { + // Indirect predictor hit + ++indirectHits; + DPRINTF(Branch, "[tid:%i]: Instruction %s predicted " + "indirect target is %s.\n", tid, pc, target); + } else { + ++indirectMisses; + pred_taken = false; + DPRINTF(Branch, "[tid:%i]: Instruction %s no indirect " + "target.\n", tid, pc); + if (!inst->isCall() && !inst->isReturn()) { + + } else if (inst->isCall() && !inst->isUncondCtrl()) { + RAS[tid].pop(); + predict_record.pushedRAS = false; + } + TheISA::advancePC(target, inst); } - TheISA::advancePC(target, inst); + iPred.recordIndirect(pc.instAddr(), target.instAddr(), seqNum, + tid); } } } else { @@ -388,6 +446,7 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) DPRINTF(Branch, "[tid:%i]: Committing branches until " "[sn:%lli].\n", tid, done_sn); + iPred.commit(done_sn, tid); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. @@ -407,6 +466,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) { History &pred_hist = predHist[tid]; + iPred.squash(squashed_sn, tid); while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { if (pred_hist.front().usedRAS) { @@ -485,8 +545,13 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, if ((*hist_it).usedRAS) { ++RASIncorrect; + DPRINTF(Branch, "[tid:%i]: Incorrect RAS [sn:%i]\n", + tid, hist_it->seqNum); } + // Have to get GHR here because the update deletes bpHistory + unsigned ghr = getGHR(hist_it->bpHistory); + update((*hist_it).pc, actually_taken, pred_hist.front().bpHistory, true); hist_it->wasSquashed = true; @@ -499,12 +564,15 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, RAS[tid].pop(); hist_it->usedRAS = true; } + if (hist_it->wasIndirect) { + ++indirectMispredicted; + iPred.recordTarget(hist_it->seqNum, ghr, corrTarget, tid); + } else { + DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" + " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); - DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" - " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); - - BTB.update((*hist_it).pc, corrTarget, tid); - + BTB.update((*hist_it).pc, corrTarget, tid); + } } else { //Actually not Taken if (hist_it->usedRAS) { diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index bef8cb949..fb10a8bee 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -52,6 +52,7 @@ #include "base/statistics.hh" #include "base/types.hh" #include "cpu/pred/btb.hh" +#include "cpu/pred/indirect.hh" #include "cpu/pred/ras.hh" #include "cpu/inst_seq.hh" #include "cpu/static_inst.hh" @@ -197,6 +198,9 @@ class BPredUnit : public SimObject void BTBUpdate(Addr instPC, const TheISA::PCState &target) { BTB.update(instPC, target, 0); } + + virtual unsigned getGHR(void* bp_history) const { return 0; } + void dump(); private: @@ -210,7 +214,7 @@ class BPredUnit : public SimObject ThreadID _tid) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0), - wasCall(0), wasReturn(0), wasSquashed(0) + wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0) {} bool operator==(const PredictorHistory &entry) const { @@ -255,6 +259,9 @@ class BPredUnit : public SimObject /** Whether this instruction has already mispredicted/updated bp */ bool wasSquashed; + + /** Wether this instruction was an indirect branch */ + bool wasIndirect; }; typedef std::deque History; @@ -276,6 +283,12 @@ class BPredUnit : public SimObject /** The per-thread return address stack. */ std::vector RAS; + /** Option to disable indirect predictor. */ + const bool useIndirect; + + /** The indirect target predictor. */ + IndirectPredictor iPred; + /** Stat for number of BP lookups. */ Stats::Scalar lookups; /** Stat for number of conditional branches predicted. */ @@ -295,6 +308,15 @@ class BPredUnit : public SimObject /** Stat for number of times the RAS is incorrect. */ Stats::Scalar RASIncorrect; + /** Stat for the number of indirect target lookups.*/ + Stats::Scalar indirectLookups; + /** Stat for the number of indirect target hits.*/ + Stats::Scalar indirectHits; + /** Stat for the number of indirect target misses.*/ + Stats::Scalar indirectMisses; + /** Stat for the number of indirect target mispredictions.*/ + Stats::Scalar indirectMispredicted; + protected: /** Number of bits to shift instructions by for predictor addresses. */ const unsigned instShiftAmt; diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc new file mode 100644 index 000000000..a8934d55e --- /dev/null +++ b/src/cpu/pred/indirect.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Mitch Hayenga + */ + +#include "cpu/pred/indirect.hh" + +#include "base/intmath.hh" +#include "debug/Indirect.hh" + +IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets, + unsigned num_sets, unsigned num_ways, + unsigned tag_bits, unsigned path_len, unsigned inst_shift, + unsigned num_threads) + : hashGHR(hash_ghr), hashTargets(hash_targets), + numSets(num_sets), numWays(num_ways), tagBits(tag_bits), + pathLength(path_len), instShift(inst_shift) +{ + if (!isPowerOf2(numSets)) { + panic("Indirect predictor requires power of 2 number of sets"); + } + + threadInfo.resize(num_threads); + + targetCache.resize(numSets); + for (unsigned i = 0; i < numSets; i++) { + targetCache[i].resize(numWays); + } +} + +bool +IndirectPredictor::lookup(Addr br_addr, unsigned ghr, TheISA::PCState& target, + ThreadID tid) +{ + Addr set_index = getSetIndex(br_addr, ghr, tid); + Addr tag = getTag(br_addr); + + assert(set_index < numSets); + + DPRINTF(Indirect, "Looking up %x (set:%d)\n", br_addr, set_index); + const auto &iset = targetCache[set_index]; + for (auto way = iset.begin(); way != iset.end(); ++way) { + if (way->tag == tag) { + DPRINTF(Indirect, "Hit %x (target:%s)\n", br_addr, way->target); + target = way->target; + return true; + } + } + DPRINTF(Indirect, "Miss %x\n", br_addr); + return false; +} + +void +IndirectPredictor::recordIndirect(Addr br_addr, Addr tgt_addr, + InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(Indirect, "Recording %x seq:%d\n", br_addr, seq_num); + HistoryEntry entry(br_addr, tgt_addr, seq_num); + threadInfo[tid].pathHist.push_back(entry); +} + +void +IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(Indirect, "Committing seq:%d\n", seq_num); + ThreadInfo &t_info = threadInfo[tid]; + + if (t_info.pathHist.empty()) return; + + if (t_info.headHistEntry < t_info.pathHist.size() && + t_info.pathHist[t_info.headHistEntry].seqNum <= seq_num) { + if (t_info.headHistEntry >= pathLength) { + t_info.pathHist.pop_front(); + } else { + ++t_info.headHistEntry; + } + } +} + +void +IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(Indirect, "Squashing seq:%d\n", seq_num); + ThreadInfo &t_info = threadInfo[tid]; + auto squash_itr = t_info.pathHist.begin(); + while (squash_itr != t_info.pathHist.end()) { + if (squash_itr->seqNum > seq_num) { + break; + } + ++squash_itr; + } + if (squash_itr != t_info.pathHist.end()) { + DPRINTF(Indirect, "Squashing series starting with sn:%d\n", + squash_itr->seqNum); + } + t_info.pathHist.erase(squash_itr, t_info.pathHist.end()); +} + + +void +IndirectPredictor::recordTarget(InstSeqNum seq_num, unsigned ghr, + const TheISA::PCState& target, ThreadID tid) +{ + ThreadInfo &t_info = threadInfo[tid]; + + // Should have just squashed so this branch should be the oldest + auto hist_entry = *(t_info.pathHist.rbegin()); + // Temporarily pop it off the history so we can calculate the set + t_info.pathHist.pop_back(); + Addr set_index = getSetIndex(hist_entry.pcAddr, ghr, tid); + Addr tag = getTag(hist_entry.pcAddr); + hist_entry.targetAddr = target.instAddr(); + t_info.pathHist.push_back(hist_entry); + + assert(set_index < numSets); + + auto &iset = targetCache[set_index]; + for (auto way = iset.begin(); way != iset.end(); ++way) { + if (way->tag == tag) { + DPRINTF(Indirect, "Updating Target (seq: %d br:%x set:%d target:" + "%s)\n", seq_num, hist_entry.pcAddr, set_index, target); + way->target = target; + return; + } + } + + DPRINTF(Indirect, "Allocating Target (seq: %d br:%x set:%d target:%s)\n", + seq_num, hist_entry.pcAddr, set_index, target); + // Did not find entry, random replacement + auto &way = iset[rand() % numWays]; + way.tag = tag; + way.target = target; +} + + +inline Addr +IndirectPredictor::getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid) +{ + ThreadInfo &t_info = threadInfo[tid]; + + Addr hash = br_addr >> instShift; + if (hashGHR) { + hash ^= ghr; + } + if (hashTargets) { + unsigned hash_shift = floorLog2(numSets) / pathLength; + for (int i = t_info.pathHist.size()-1, p = 0; + i >= 0 && p < pathLength; i--, p++) { + hash ^= (t_info.pathHist[i].targetAddr >> + (instShift + p*hash_shift)); + } + } + return hash & (numSets-1); +} + +inline Addr +IndirectPredictor::getTag(Addr br_addr) +{ + return (br_addr >> instShift) & ((0x1< + +#include "arch/isa_traits.hh" +#include "config/the_isa.hh" +#include "cpu/inst_seq.hh" + +class IndirectPredictor +{ + public: + IndirectPredictor(bool hash_ghr, bool hash_targets, + unsigned num_sets, unsigned num_ways, + unsigned tag_bits, unsigned path_len, + unsigned inst_shift, unsigned num_threads); + bool lookup(Addr br_addr, unsigned ghr, TheISA::PCState& br_target, + ThreadID tid); + void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num, + ThreadID tid); + void commit(InstSeqNum seq_num, ThreadID tid); + void squash(InstSeqNum seq_num, ThreadID tid); + void recordTarget(InstSeqNum seq_num, unsigned ghr, + const TheISA::PCState& target, ThreadID tid); + + private: + const bool hashGHR; + const bool hashTargets; + const unsigned numSets; + const unsigned numWays; + const unsigned tagBits; + const unsigned pathLength; + const unsigned instShift; + + struct IPredEntry + { + IPredEntry() : tag(0), target(0) { } + Addr tag; + TheISA::PCState target; + }; + + std::vector > targetCache; + + Addr getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid); + Addr getTag(Addr br_addr); + + struct HistoryEntry + { + HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num) + : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { } + Addr pcAddr; + Addr targetAddr; + InstSeqNum seqNum; + }; + + + struct ThreadInfo { + ThreadInfo() : headHistEntry(0) { } + + std::deque pathHist; + unsigned headHistEntry; + }; + + std::vector threadInfo; +}; + +#endif // __CPU_PRED_INDIRECT_HH__ diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index ea6be46fa..2dd48a09c 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -388,6 +388,12 @@ TournamentBPParams::create() return new TournamentBP(this); } +unsigned +TournamentBP::getGHR(void *bp_history) const +{ + return static_cast(bp_history)->globalHistory; +} + #ifdef DEBUG int TournamentBP::BPHistory::newCount = 0; diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index 8fb5b515c..82a1daa64 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -114,6 +114,8 @@ class TournamentBP : public BPredUnit */ void squash(void *bp_history); + unsigned getGHR(void *bp_history) const; + /** Returns the global history. */ inline unsigned readGlobalHist() { return globalHistory; } -- cgit v1.2.3