diff options
-rw-r--r-- | src/cpu/pred/BranchPredictor.py | 40 | ||||
-rw-r--r-- | src/cpu/pred/SConscript | 1 | ||||
-rw-r--r-- | src/cpu/pred/loop_predictor.cc | 361 | ||||
-rw-r--r-- | src/cpu/pred/loop_predictor.hh | 272 | ||||
-rw-r--r-- | src/cpu/pred/ltage.cc | 286 | ||||
-rw-r--r-- | src/cpu/pred/ltage.hh | 128 |
6 files changed, 703 insertions, 385 deletions
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 06856865a..1d5fd5eec 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -145,16 +145,10 @@ class LTAGE_TAGE(TAGEBase): logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9] logUResetPeriod = 19 -# LTAGE branch predictor as described in -# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf -# It is basically a TAGE predictor plus a loop predictor -# The differnt TAGE sizes are updated according to the paper values (256 Kbits) -class LTAGE(TAGE): - type = 'LTAGE' - cxx_class = 'LTAGE' - cxx_header = "cpu/pred/ltage.hh" - - tage = LTAGE_TAGE() +class LoopPredictor(SimObject): + type = 'LoopPredictor' + cxx_class = 'LoopPredictor' + cxx_header = 'cpu/pred/loop_predictor.hh' logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor") withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter") @@ -166,8 +160,8 @@ class LTAGE(TAGE): logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor associativity") # Parameters for enabling modifications to the loop predictor - # They have been copied from ISL-TAGE - # (https://www.jilp.org/jwac-2/program/03_seznec.tgz) + # They have been copied from TAGE-GSC-IMLI + # (http://www.irisa.fr/alf/downloads/seznec/TAGE-GSC-IMLI.tar) # # All of them should be disabled to match the original LTAGE implementation # (http://hpca23.cse.tamu.edu/taco/camino/cbp2/cbp-src/realistic-seznec.h) @@ -181,3 +175,25 @@ class LTAGE(TAGE): # Add a direction bit to the loop table entries useDirectionBit = Param.Bool(False, "Use direction info") + # If true, use random to decide whether to allocate or not, and only try + # with one entry + restrictAllocation = Param.Bool(False, + "Restrict the allocation conditions") + + initialLoopIter = Param.Unsigned(1, "Initial iteration number") + initialLoopAge = Param.Unsigned(255, "Initial age value") + optionalAgeReset = Param.Bool(True, + "Reset age bits optionally in some cases") + + +# LTAGE branch predictor as described in +# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf +# It is basically a TAGE predictor plus a loop predictor +# The differnt TAGE sizes are updated according to the paper values (256 Kbits) +class LTAGE(TAGE): + type = 'LTAGE' + cxx_class = 'LTAGE' + cxx_header = "cpu/pred/ltage.hh" + + tage = LTAGE_TAGE() + loop_predictor = Param.LoopPredictor(LoopPredictor(), "Loop predictor") diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 96451f253..9e7e166ea 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -45,6 +45,7 @@ Source('tournament.cc') Source ('bi_mode.cc') Source('tage_base.cc') Source('tage.cc') +Source('loop_predictor.cc') Source('ltage.cc') DebugFlag('FreeList') DebugFlag('Branch') diff --git a/src/cpu/pred/loop_predictor.cc b/src/cpu/pred/loop_predictor.cc new file mode 100644 index 000000000..2e709f392 --- /dev/null +++ b/src/cpu/pred/loop_predictor.cc @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +#include "cpu/pred/loop_predictor.hh" + +#include "params/LoopPredictor.hh" + +LoopPredictor::LoopPredictor(LoopPredictorParams *p) + : SimObject(p), logSizeLoopPred(p->logSizeLoopPred), + loopTableAgeBits(p->loopTableAgeBits), + loopTableConfidenceBits(p->loopTableConfidenceBits), + loopTableTagBits(p->loopTableTagBits), + loopTableIterBits(p->loopTableIterBits), + logLoopTableAssoc(p->logLoopTableAssoc), + confidenceThreshold((1 << loopTableConfidenceBits) - 1), + loopTagMask((1 << loopTableTagBits) - 1), + loopNumIterMask((1 << loopTableIterBits) - 1), + loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), + loopUseCounter(-1), + withLoopBits(p->withLoopBits), + useDirectionBit(p->useDirectionBit), + useSpeculation(p->useSpeculation), + useHashing(p->useHashing), + restrictAllocation(p->restrictAllocation), + initialLoopIter(p->initialLoopIter), + initialLoopAge(p->initialLoopAge), + optionalAgeReset(p->optionalAgeReset) +{ + assert(initialLoopAge <= ((1 << loopTableAgeBits) - 1)); +} + +void +LoopPredictor::init() +{ + // we use uint16_t type for these vales, so they cannot be more than + // 16 bits + assert(loopTableTagBits <= 16); + assert(loopTableIterBits <= 16); + + assert(logSizeLoopPred >= logLoopTableAssoc); + + ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; +} + +LoopPredictor::BranchInfo* +LoopPredictor::makeBranchInfo() +{ + return new BranchInfo(); +} + +int +LoopPredictor::lindex(Addr pc_in, unsigned instShiftAmt) const +{ + // The loop table is implemented as a linear table + // If associativity is N (N being 1 << logLoopTableAssoc), + // the first N entries are for set 0, the next N entries are for set 1, + // and so on. + // Thus, this function calculates the set and then it gets left shifted + // by logLoopTableAssoc in order to return the index of the first of the + // N entries of the set + Addr pc = pc_in >> instShiftAmt; + if (useHashing) { + pc ^= pc_in; + } + return ((pc & loopSetMask) << logLoopTableAssoc); +} + +int +LoopPredictor::finallindex(int index, int lowPcBits, int way) const +{ + return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : + (index)) + + way; +} + +//loop prediction: only used if high confidence +bool +LoopPredictor::getLoop(Addr pc, BranchInfo* bi, bool speculative, + unsigned instShiftAmt) const +{ + bi->loopHit = -1; + bi->loopPredValid = false; + bi->loopIndex = lindex(pc, instShiftAmt); + + if (useHashing) { + unsigned pcShift = logSizeLoopPred - logLoopTableAssoc; + bi->loopIndexB = (pc >> pcShift) & loopSetMask; + bi->loopTag = (pc >> pcShift) ^ (pc >> (pcShift + loopTableTagBits)); + bi->loopTag &= loopTagMask; + } else { + unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; + bi->loopTag = (pc >> pcShift) & loopTagMask; + // bi->loopIndexB is not used without hash + } + + for (int i = 0; i < (1 << logLoopTableAssoc); i++) { + int idx = finallindex(bi->loopIndex, bi->loopIndexB, i); + if (ltable[idx].tag == bi->loopTag) { + bi->loopHit = i; + bi->loopPredValid = calcConf(idx); + + uint16_t iter = speculative ? ltable[idx].currentIterSpec + : ltable[idx].currentIter; + + if ((iter + 1) == ltable[idx].numIter) { + return useDirectionBit ? !(ltable[idx].dir) : false; + } else { + return useDirectionBit ? (ltable[idx].dir) : true; + } + } + } + return false; +} + +bool +LoopPredictor::calcConf(int index) const +{ + return ltable[index].confidence == confidenceThreshold; +} + +void +LoopPredictor::specLoopUpdate(bool taken, BranchInfo* bi) +{ + if (bi->loopHit>=0) { + int index = finallindex(bi->loopIndex, bi->loopIndexB, bi->loopHit); + if (taken != ltable[index].dir) { + ltable[index].currentIterSpec = 0; + } else { + ltable[index].currentIterSpec = + (ltable[index].currentIterSpec + 1) & loopNumIterMask; + } + } +} + +bool +LoopPredictor::optionalAgeInc(int nrand) const +{ + return false; +} + +void +LoopPredictor::loopUpdate(Addr pc, bool taken, BranchInfo* bi, bool tage_pred, + int random0, int random1, int random2) +{ + int idx = finallindex(bi->loopIndex, bi->loopIndexB, bi->loopHit); + if (bi->loopHit >= 0) { + //already a hit + if (bi->loopPredValid) { + if (taken != bi->loopPred) { + // free the entry + ltable[idx].numIter = 0; + ltable[idx].age = 0; + ltable[idx].confidence = 0; + ltable[idx].currentIter = 0; + return; + } else if (bi->loopPred != tage_pred || optionalAgeInc(random0)) { + unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); + } + } + + ltable[idx].currentIter = + (ltable[idx].currentIter + 1) & loopNumIterMask; + if (ltable[idx].currentIter > ltable[idx].numIter) { + ltable[idx].confidence = 0; + if (ltable[idx].numIter != 0) { + // free the entry + ltable[idx].numIter = 0; + if (optionalAgeReset) { + ltable[idx].age = 0; + } + } + } + + if (taken != (useDirectionBit ? ltable[idx].dir : true)) { + if (ltable[idx].currentIter == ltable[idx].numIter) { + unsignedCtrUpdate(ltable[idx].confidence, true, + loopTableConfidenceBits); + //just do not predict when the loop count is 1 or 2 + if (ltable[idx].numIter < 3) { + // free the entry + ltable[idx].dir = taken; // ignored if no useDirectionBit + ltable[idx].numIter = 0; + ltable[idx].age = 0; + ltable[idx].confidence = 0; + } + } else { + if (ltable[idx].numIter == 0) { + // first complete nest; + ltable[idx].confidence = 0; + ltable[idx].numIter = ltable[idx].currentIter; + } else { + //not the same number of iterations as last time: free the + //entry + ltable[idx].numIter = 0; + if (optionalAgeReset) { + ltable[idx].age = 0; + } + ltable[idx].confidence = 0; + } + } + ltable[idx].currentIter = 0; + } + + } else if (useDirectionBit ? (bi->predTaken != taken) : taken) { + if ((random2 & 3) == 0 || !restrictAllocation) { + //try to allocate an entry on taken branch + int nrand = random1; + for (int i = 0; i < (1 << logLoopTableAssoc); i++) { + int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); + idx = finallindex(bi->loopIndex, bi->loopIndexB, loop_hit); + if (ltable[idx].age == 0) { + ltable[idx].dir = !taken; // ignored if no useDirectionBit + ltable[idx].tag = bi->loopTag; + ltable[idx].numIter = 0; + ltable[idx].age = initialLoopAge; + ltable[idx].confidence = 0; + ltable[idx].currentIter = initialLoopIter; + break; + + } else { + ltable[idx].age--; + } + if (restrictAllocation) { + break; + } + } + } + } +} + +bool +LoopPredictor::loopPredict(ThreadID tid, Addr branch_pc, bool cond_branch, + BranchInfo* bi, bool prev_pred_taken, unsigned instShiftAmt) +{ + bool pred_taken = prev_pred_taken; + if (cond_branch) { + // loop prediction + bi->loopPred = getLoop(branch_pc, bi, useSpeculation, instShiftAmt); + + if ((loopUseCounter >= 0) && bi->loopPredValid) { + pred_taken = bi->loopPred; + bi->loopPredUsed = true; + } + + if (useSpeculation) { + specLoopUpdate(pred_taken, bi); + } + } + + return pred_taken; +} + +void +LoopPredictor::squash(ThreadID tid, BranchInfo *bi) +{ + if (bi->loopHit >= 0) { + int idx = finallindex(bi->loopIndex, + bi->loopIndexB, + bi->loopHit); + ltable[idx].currentIterSpec = bi->currentIter; + } +} + +void +LoopPredictor::squashLoop(BranchInfo* bi) +{ + if (bi->loopHit >= 0) { + int idx = finallindex(bi->loopIndex, + bi->loopIndexB, + bi->loopHit); + ltable[idx].currentIterSpec = bi->currentIter; + } +} + +void +LoopPredictor::updateStats(bool taken, BranchInfo* bi) +{ + if (taken == bi->loopPred) { + loopPredictorCorrect++; + } else { + loopPredictorWrong++; + } +} + +void +LoopPredictor::condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken, + bool tage_pred, BranchInfo* bi, + unsigned instShiftAmt, int rand0, int rand1, + int rand2) +{ + if (useSpeculation) { + // recalculate loop prediction without speculation + // It is ok to overwrite the loop prediction fields in bi + // as the stats have already been updated with the previous + // values + bi->loopPred = getLoop(branch_pc, bi, false, instShiftAmt); + } + + if (bi->loopPredValid) { + if (bi->predTaken != bi->loopPred) { + signedCtrUpdate(loopUseCounter, + (bi->loopPred == taken), + withLoopBits); + } + } + + loopUpdate(branch_pc, taken, bi, tage_pred, rand0, rand1, rand2); +} + +void +LoopPredictor::regStats() +{ + loopPredictorCorrect + .name(name() + ".loopPredictorCorrect") + .desc("Number of times the loop predictor is the provider and " + "the prediction is correct"); + + loopPredictorWrong + .name(name() + ".loopPredictorWrong") + .desc("Number of times the loop predictor is the provider and " + "the prediction is wrong"); +} + +LoopPredictor * +LoopPredictorParams::create() +{ + return new LoopPredictor(this); +} diff --git a/src/cpu/pred/loop_predictor.hh b/src/cpu/pred/loop_predictor.hh new file mode 100644 index 000000000..3579c5763 --- /dev/null +++ b/src/cpu/pred/loop_predictor.hh @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +#ifndef __CPU_PRED_LOOP_PREDICTOR_HH__ +#define __CPU_PRED_LOOP_PREDICTOR_HH__ + +#include "base/statistics.hh" +#include "base/types.hh" +#include "sim/sim_object.hh" + +struct LoopPredictorParams; + +class LoopPredictor : public SimObject +{ + protected: + const unsigned logSizeLoopPred; + const unsigned loopTableAgeBits; + const unsigned loopTableConfidenceBits; + const unsigned loopTableTagBits; + const unsigned loopTableIterBits; + const unsigned logLoopTableAssoc; + const uint8_t confidenceThreshold; + const uint16_t loopTagMask; + const uint16_t loopNumIterMask; + const int loopSetMask; + + // Prediction Structures + // Loop Predictor Entry + struct LoopEntry + { + uint16_t numIter; + uint16_t currentIter; + uint16_t currentIterSpec; // only for useSpeculation + uint8_t confidence; + uint16_t tag; + uint8_t age; + bool dir; // only for useDirectionBit + + LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0), + confidence(0), tag(0), age(0), dir(0) { } + }; + + LoopEntry *ltable; + + int8_t loopUseCounter; + unsigned withLoopBits; + + const bool useDirectionBit; + const bool useSpeculation; + const bool useHashing; + const bool restrictAllocation; + const unsigned initialLoopIter; + const unsigned initialLoopAge; + const bool optionalAgeReset; + + // stats + Stats::Scalar loopPredictorCorrect; + Stats::Scalar loopPredictorWrong; + + /** + * Updates an unsigned counter based on up/down parameter + * @param ctr Reference to counter to update. + * @param up Boolean indicating if the counter is incremented/decremented + * If true it is incremented, if false it is decremented + * @param nbits Counter width. + */ + static inline void unsignedCtrUpdate(uint8_t &ctr, bool up, unsigned nbits) + { + assert(nbits <= sizeof(uint8_t) << 3); + if (up) { + if (ctr < ((1 << nbits) - 1)) + ctr++; + } else { + if (ctr) + ctr--; + } + } + static inline void signedCtrUpdate(int8_t &ctr, bool up, unsigned nbits) + { + if (up) { + if (ctr < ((1 << (nbits - 1)) - 1)) + ctr++; + } else { + if (ctr > -(1 << (nbits - 1))) + ctr--; + } + } + public: + // Primary branch history entry + struct BranchInfo + { + uint16_t loopTag; + uint16_t currentIter; + + bool loopPred; + bool loopPredValid; + bool loopPredUsed; + int loopIndex; + int loopIndexB; // only for useHashing + int loopHit; + bool predTaken; + + BranchInfo() + : loopTag(0), currentIter(0), + loopPred(false), + loopPredValid(false), loopIndex(0), loopIndexB(0), loopHit(0), + predTaken(false) + {} + }; + + /** + * Computes the index used to access the + * loop predictor. + * @param pc_in The unshifted branch PC. + * @param instShiftAmt Shift the pc by as many bits + */ + int lindex(Addr pc_in, unsigned instShiftAmt) const; + + /** + * Computes the index used to access the + * ltable structures. + * It may take hashing into account + * @param index Result of lindex function + * @param lowPcBits PC bits masked with set size + * @param way Way to be used + */ + int finallindex(int lindex, int lowPcBits, int way) const; + + /** + * Get a branch prediction from the loop + * predictor. + * @param pc The unshifted branch PC. + * @param bi Pointer to information on the + * prediction. + * @param speculative Use speculative number of iterations + * @param instShiftAmt Shift the pc by as many bits (if hashing is not + * used) + * @result the result of the prediction, if it could be predicted + */ + bool getLoop(Addr pc, BranchInfo* bi, bool speculative, + unsigned instShiftAmt) const; + + /** + * Updates the loop predictor. + * @param pc The unshifted branch PC. + * @param taken The actual branch outcome. + * @param bi Pointer to information on the + * prediction recorded at prediction time. + * @param tage_pred tage prediction of the branch + * @param random0 random value + * @param random1 random value + * @param random2 random value + */ + void loopUpdate(Addr pc, bool Taken, BranchInfo* bi, bool tage_pred, + int random0, int random1, int random2); + + /** + * Speculatively updates the loop predictor + * iteration count (only for useSpeculation). + * @param taken The predicted branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void specLoopUpdate(bool taken, BranchInfo* bi); + + /** + * Update LTAGE for conditional branches. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param tage_pred Prediction from TAGE + * @param bi Pointer to information on the prediction + * recorded at prediction time. + * @param instShiftAmt Number of bits to shift instructions + * @param rand0 Random integer value + * @param rand1 Random integer value + * @param rand2 Random integer value + */ + void condBranchUpdate( + ThreadID tid, Addr branch_pc, bool taken, bool tage_pred, + BranchInfo* bi, unsigned instShiftAmt, int rand0, int rand1, + int rand2); + + /** + * Get the loop prediction + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param bi Reference to wrapping pointer to allow storing + * derived class prediction information in the base class. + * @param prev_pred_taken Result of the TAGE prediction + * @param instShiftAmt Shift the pc by as many bits + * @param instShiftAmt Shift the pc by as many bits + * @result the prediction, true if taken + */ + bool loopPredict( + ThreadID tid, Addr branch_pc, bool cond_branch, + BranchInfo* bi, bool prev_pred_taken, unsigned instShiftAmt); + + /** + * Update the stats + * @param taken Actual branch outcome + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void updateStats(bool taken, BranchInfo* bi); + + void squashLoop(BranchInfo * bi); + + void squash(ThreadID tid, BranchInfo *bi); + + virtual bool calcConf(int index) const; + + virtual bool optionalAgeInc(int nrand) const; + + virtual BranchInfo *makeBranchInfo(); + + /** + * Gets the value of the loop use counter + * @return the loop use counter value + */ + int8_t getLoopUseCounter() const + { + return loopUseCounter; + } + + /** + * Initialize the loop predictor + */ + void init() override; + + /** + * Register stats for this object + */ + void regStats() override; + + LoopPredictor(LoopPredictorParams *p); +}; +#endif//__CPU_PRED_LOOP_PREDICTOR_HH__ diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index 22945ecda..cab86e459 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -42,239 +42,43 @@ #include "base/intmath.hh" #include "base/logging.hh" -#include "base/random.hh" #include "base/trace.hh" #include "debug/Fetch.hh" #include "debug/LTage.hh" LTAGE::LTAGE(const LTAGEParams *params) - : TAGE(params), - logSizeLoopPred(params->logSizeLoopPred), - loopTableAgeBits(params->loopTableAgeBits), - loopTableConfidenceBits(params->loopTableConfidenceBits), - loopTableTagBits(params->loopTableTagBits), - loopTableIterBits(params->loopTableIterBits), - logLoopTableAssoc(params->logLoopTableAssoc), - confidenceThreshold((1 << loopTableConfidenceBits) - 1), - loopTagMask((1 << loopTableTagBits) - 1), - loopNumIterMask((1 << loopTableIterBits) - 1), - loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), - loopUseCounter(0), - withLoopBits(params->withLoopBits), - useDirectionBit(params->useDirectionBit), - useSpeculation(params->useSpeculation), - useHashing(params->useHashing) + : TAGE(params), loopPredictor(params->loop_predictor) { - // we use uint16_t type for these vales, so they cannot be more than - // 16 bits - assert(loopTableTagBits <= 16); - assert(loopTableIterBits <= 16); - - assert(logSizeLoopPred >= logLoopTableAssoc); - - ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; -} - -int -LTAGE::lindex(Addr pc_in) const -{ - // The loop table is implemented as a linear table - // If associativity is N (N being 1 << logLoopTableAssoc), - // the first N entries are for set 0, the next N entries are for set 1, - // and so on. - // Thus, this function calculates the set and then it gets left shifted - // by logLoopTableAssoc in order to return the index of the first of the - // N entries of the set - Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; - Addr pc = pc_in >> instShiftAmt; - if (useHashing) { - // copied from TAGE-SC-L - // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) - pc ^= (pc_in >> (instShiftAmt + logLoopTableAssoc)); - } - return ((pc & mask) << logLoopTableAssoc); -} - -int -LTAGE::finallindex(int index, int lowPcBits, int way) const -{ - // copied from TAGE-SC-L - // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) - return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : - (index)) - + way; -} - -//loop prediction: only used if high confidence -bool -LTAGE::getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const -{ - bi->loopHit = -1; - bi->loopPredValid = false; - bi->loopIndex = lindex(pc); - unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; - bi->loopTag = ((pc) >> pcShift) & loopTagMask; - - if (useHashing) { - bi->loopTag ^= ((pc >> (pcShift + logSizeLoopPred)) & loopTagMask); - bi->loopLowPcBits = (pc >> pcShift) & loopSetMask; - } - - for (int i = 0; i < (1 << logLoopTableAssoc); i++) { - int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, i); - if (ltable[idx].tag == bi->loopTag) { - bi->loopHit = i; - bi->loopPredValid = - ltable[idx].confidence == confidenceThreshold; - - uint16_t iter = speculative ? ltable[idx].currentIterSpec - : ltable[idx].currentIter; - - if ((iter + 1) == ltable[idx].numIter) { - return useDirectionBit ? !(ltable[idx].dir) : false; - } else { - return useDirectionBit ? (ltable[idx].dir) : true; - } - } - } - return false; -} - -void -LTAGE::specLoopUpdate(bool taken, LTageBranchInfo* bi) -{ - if (bi->loopHit>=0) { - int index = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); - if (taken != ltable[index].dir) { - ltable[index].currentIterSpec = 0; - } else { - ltable[index].currentIterSpec = - (ltable[index].currentIterSpec + 1) & loopNumIterMask; - } - } -} - -void -LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) -{ - int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); - if (bi->loopHit >= 0) { - //already a hit - if (bi->loopPredValid) { - if (taken != bi->loopPred) { - // free the entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - ltable[idx].currentIter = 0; - return; - } else if (bi->loopPred != bi->tageBranchInfo->tagePred) { - DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); - TAGEBase::unsignedCtrUpdate(ltable[idx].age, true, - loopTableAgeBits); - } - } - - ltable[idx].currentIter = - (ltable[idx].currentIter + 1) & loopNumIterMask; - if (ltable[idx].currentIter > ltable[idx].numIter) { - ltable[idx].confidence = 0; - if (ltable[idx].numIter != 0) { - // free the entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } - - if (taken != (useDirectionBit ? ltable[idx].dir : true)) { - if (ltable[idx].currentIter == ltable[idx].numIter) { - DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); - - TAGEBase::unsignedCtrUpdate(ltable[idx].confidence, true, - loopTableConfidenceBits); - //just do not predict when the loop count is 1 or 2 - if (ltable[idx].numIter < 3) { - // free the entry - ltable[idx].dir = taken; // ignored if no useDirectionBit - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } else { - DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); - if (ltable[idx].numIter == 0) { - // first complete nest; - ltable[idx].confidence = 0; - ltable[idx].numIter = ltable[idx].currentIter; - } else { - //not the same number of iterations as last time: free the - //entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } - ltable[idx].currentIter = 0; - } - - } else if (useDirectionBit ? - ((bi->loopPredValid ? - bi->loopPred : bi->tageBranchInfo->tagePred) != taken) : - taken) { - //try to allocate an entry on taken branch - int nrand = TAGEBase::getRandom(); - for (int i = 0; i < (1 << logLoopTableAssoc); i++) { - int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); - idx = bi->loopIndex + loop_hit; - if (ltable[idx].age == 0) { - DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", - pc); - ltable[idx].dir = !taken; // ignored if no useDirectionBit - ltable[idx].tag = bi->loopTag; - ltable[idx].numIter = 0; - ltable[idx].age = (1 << loopTableAgeBits) - 1; - ltable[idx].confidence = 0; - ltable[idx].currentIter = 1; - break; - - } - else - ltable[idx].age--; - } - } - } //prediction bool LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) { - LTageBranchInfo *bi = new LTageBranchInfo(*tage); + LTageBranchInfo *bi = new LTageBranchInfo(*tage, *loopPredictor); b = (void*)(bi); bool pred_taken = tage->tagePredict(tid, branch_pc, cond_branch, bi->tageBranchInfo); + pred_taken = loopPredictor->loopPredict(tid, branch_pc, cond_branch, + bi->lpBranchInfo, pred_taken, + instShiftAmt); if (cond_branch) { - // loop prediction - bi->loopPred = getLoop(branch_pc, bi, useSpeculation); - - if ((loopUseCounter >= 0) && bi->loopPredValid) { - pred_taken = bi->loopPred; + if (bi->lpBranchInfo->loopPredUsed) { bi->tageBranchInfo->provider = LOOP; } DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", - branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, - loopUseCounter, bi->tageBranchInfo->tagePred, - bi->tageBranchInfo->altTaken); - - if (useSpeculation) { - specLoopUpdate(pred_taken, bi); - } + branch_pc, pred_taken, bi->lpBranchInfo->loopPred, + bi->lpBranchInfo->loopPredValid, + loopPredictor->getLoopUseCounter(), + bi->tageBranchInfo->tagePred, bi->tageBranchInfo->altTaken); } + // record final prediction + bi->lpBranchInfo->predTaken = pred_taken; + return pred_taken; } @@ -286,12 +90,17 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(bp_history); + assert(corrTarget != MaxAddr); + if (squashed) { if (tage->isSpeculativeUpdateEnabled()) { // This restores the global history, then update it // and recomputes the folded histories. tage->squash(tid, taken, bi->tageBranchInfo, corrTarget); - squashLoop(bi); + + if (bi->tageBranchInfo->condBranch) { + loopPredictor->squashLoop(bi->lpBranchInfo); + } } return; } @@ -301,31 +110,13 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", branch_pc, taken); tage->updateStats(taken, bi->tageBranchInfo); - // update stats - if (bi->tageBranchInfo->provider == LOOP) { - if (taken == bi->loopPred) { - loopPredictorCorrect++; - } else { - loopPredictorWrong++; - } - } - // cond Branch Update - if (useSpeculation) { - // recalculate loop prediction without speculation - // It is ok to overwrite the loop prediction fields in bi - // as the stats have already been updated with the previous - // values - bi->loopPred = getLoop(branch_pc, bi, false); - } - if (bi->loopPredValid) { - if (bi->tageBranchInfo->tagePred != bi->loopPred) { - TAGEBase::ctrUpdate(loopUseCounter, - (bi->loopPred == taken), - withLoopBits); - } - } - loopUpdate(branch_pc, taken, bi); + loopPredictor->updateStats(taken, bi->lpBranchInfo); + + loopPredictor->condBranchUpdate(tid, branch_pc, taken, + bi->tageBranchInfo->tagePred, bi->lpBranchInfo, instShiftAmt, + TAGEBase::getRandom(), TAGEBase::getRandom(), + TAGEBase::getRandom()); tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo, nrand, corrTarget); @@ -338,25 +129,12 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, } void -LTAGE::squashLoop(LTageBranchInfo* bi) -{ - if (bi->tageBranchInfo->condBranch) { - if (bi->loopHit >= 0) { - int idx = finallindex(bi->loopIndex, - bi->loopLowPcBits, - bi->loopHit); - ltable[idx].currentIterSpec = bi->currentIter; - } - } -} - -void LTAGE::squash(ThreadID tid, void *bp_history) { LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); if (bi->tageBranchInfo->condBranch) { - squashLoop(bi); + loopPredictor->squash(tid, bi->lpBranchInfo); } TAGE::squash(tid, bp_history); @@ -366,20 +144,8 @@ void LTAGE::regStats() { TAGE::regStats(); - - loopPredictorCorrect - .name(name() + ".loopPredictorCorrect") - .desc("Number of times the loop predictor is the provider and " - "the prediction is correct"); - - loopPredictorWrong - .name(name() + ".loopPredictorWrong") - .desc("Number of times the loop predictor is the provider and " - "the prediction is wrong"); } - - LTAGE* LTAGEParams::create() { diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index c749cba2c..caddbc6a6 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -56,10 +56,11 @@ #include <vector> #include "base/types.hh" +#include "cpu/pred/loop_predictor.hh" #include "cpu/pred/tage.hh" #include "params/LTAGE.hh" -class LTAGE: public TAGE +class LTAGE : public TAGE { public: LTAGE(const LTAGEParams *params); @@ -68,101 +69,34 @@ class LTAGE: public TAGE void squash(ThreadID tid, void *bp_history) override; void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed, const StaticInstPtr & inst, - Addr corrTarget) override; + Addr corrTarget = MaxAddr) override; + virtual void regStats() override; - void regStats() override; - - private: - // Prediction Structures - // Loop Predictor Entry - struct LoopEntry - { - uint16_t numIter; - uint16_t currentIter; - uint16_t currentIterSpec; // only for useSpeculation - uint8_t confidence; - uint16_t tag; - uint8_t age; - bool dir; // only for useDirectionBit - - LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0), - confidence(0), tag(0), age(0), dir(0) { } - }; + protected: + /** The loop predictor object */ + LoopPredictor *loopPredictor; // more provider types enum { - LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1 + LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1, + LAST_LTAGE_PROVIDER_TYPE = LOOP }; // Primary branch history entry struct LTageBranchInfo : public TageBranchInfo { - uint16_t loopTag; - uint16_t currentIter; - - bool loopPred; - bool loopPredValid; - int loopIndex; - int loopLowPcBits; // only for useHashing - int loopHit; - - LTageBranchInfo(TAGEBase &tage) - : TageBranchInfo(tage), - loopTag(0), currentIter(0), - loopPred(false), - loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0) + LoopPredictor::BranchInfo *lpBranchInfo; + LTageBranchInfo(TAGEBase &tage, LoopPredictor &lp) + : TageBranchInfo(tage), lpBranchInfo(lp.makeBranchInfo()) {} virtual ~LTageBranchInfo() - {} + { + delete lpBranchInfo; + } }; /** - * Computes the index used to access the - * loop predictor. - * @param pc_in The unshifted branch PC. - */ - int lindex(Addr pc_in) const; - - /** - * Computes the index used to access the - * ltable structures. - * It may take hashing into account - * @param index Result of lindex function - * @param lowPcBits PC bits masked with set size - * @param way Way to be used - */ - int finallindex(int lindex, int lowPcBits, int way) const; - - /** - * Get a branch prediction from the loop - * predictor. - * @param pc The unshifted branch PC. - * @param bi Pointer to information on the - * prediction. - * @param speculative Use speculative number of iterations - */ - bool getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const; - - /** - * Updates the loop predictor. - * @param pc The unshifted branch PC. - * @param taken The actual branch outcome. - * @param bi Pointer to information on the - * prediction recorded at prediction time. - */ - void loopUpdate(Addr pc, bool Taken, LTageBranchInfo* bi); - - /** - * Speculatively updates the loop predictor - * iteration count (only for useSpeculation). - * @param taken The predicted branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void specLoopUpdate(bool taken, LTageBranchInfo* bi); - - /** * Get a branch prediction from LTAGE. *NOT* an override of * BpredUnit::predict(). * @param tid The thread ID to select the global @@ -174,38 +108,6 @@ class LTAGE: public TAGE */ bool predict( ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override; - - /** - * Restores speculatively updated path and direction histories. - * Also recomputes compressed (folded) histories based on the - * correct branch outcome. - * @param bi Branch information. - */ - void squashLoop(LTageBranchInfo * bi); - - const unsigned logSizeLoopPred; - const unsigned loopTableAgeBits; - const unsigned loopTableConfidenceBits; - const unsigned loopTableTagBits; - const unsigned loopTableIterBits; - const unsigned logLoopTableAssoc; - const uint8_t confidenceThreshold; - const uint16_t loopTagMask; - const uint16_t loopNumIterMask; - const int loopSetMask; - - LoopEntry *ltable; - - int8_t loopUseCounter; - unsigned withLoopBits; - - const bool useDirectionBit; - const bool useSpeculation; - const bool useHashing; - - // stats - Stats::Scalar loopPredictorCorrect; - Stats::Scalar loopPredictorWrong; }; #endif // __CPU_PRED_LTAGE |