diff options
author | Javier Bueno <javier.bueno@metempsy.com> | 2019-01-28 23:59:45 +0100 |
---|---|---|
committer | Pau Cabre <pau.cabre@metempsy.com> | 2019-02-05 10:12:56 +0000 |
commit | 02d2d7b1e0fcbdc96012c31a9be0ae2a187d4d44 (patch) | |
tree | f5c658a89333c2d042a39759b7913560b43ebf5a | |
parent | 4ba89236f00e1d18e7785da57941f25abfed6033 (diff) | |
download | gem5-02d2d7b1e0fcbdc96012c31a9be0ae2a187d4d44.tar.xz |
cpu: Made the Loop Predictor a SimObject
The Loop Predictor implementation is now a SimObject so that other branch
predictors can easily use it (including LTAGE, which is now using it).
It has also been updated with the latest
available loop predictor implementation from Andre Seznec:
http://www.irisa.fr/alf/downloads/seznec/TAGE-GSC-IMLI.tar
Change-Id: I60ad079a2c49b00a1f84d5cfd3611631883a4b57
Reviewed-on: https://gem5-review.googlesource.com/c/15775
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
-rw-r--r-- | src/cpu/pred/BranchPredictor.py | 40 | ||||
-rw-r--r-- | src/cpu/pred/SConscript | 1 | ||||
-rw-r--r-- | src/cpu/pred/loop_predictor.cc | 361 | ||||
-rw-r--r-- | src/cpu/pred/loop_predictor.hh | 272 | ||||
-rw-r--r-- | src/cpu/pred/ltage.cc | 286 | ||||
-rw-r--r-- | src/cpu/pred/ltage.hh | 128 |
6 files changed, 703 insertions, 385 deletions
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 06856865a..1d5fd5eec 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -145,16 +145,10 @@ class LTAGE_TAGE(TAGEBase): logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9] logUResetPeriod = 19 -# LTAGE branch predictor as described in -# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf -# It is basically a TAGE predictor plus a loop predictor -# The differnt TAGE sizes are updated according to the paper values (256 Kbits) -class LTAGE(TAGE): - type = 'LTAGE' - cxx_class = 'LTAGE' - cxx_header = "cpu/pred/ltage.hh" - - tage = LTAGE_TAGE() +class LoopPredictor(SimObject): + type = 'LoopPredictor' + cxx_class = 'LoopPredictor' + cxx_header = 'cpu/pred/loop_predictor.hh' logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor") withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter") @@ -166,8 +160,8 @@ class LTAGE(TAGE): logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor associativity") # Parameters for enabling modifications to the loop predictor - # They have been copied from ISL-TAGE - # (https://www.jilp.org/jwac-2/program/03_seznec.tgz) + # They have been copied from TAGE-GSC-IMLI + # (http://www.irisa.fr/alf/downloads/seznec/TAGE-GSC-IMLI.tar) # # All of them should be disabled to match the original LTAGE implementation # (http://hpca23.cse.tamu.edu/taco/camino/cbp2/cbp-src/realistic-seznec.h) @@ -181,3 +175,25 @@ class LTAGE(TAGE): # Add a direction bit to the loop table entries useDirectionBit = Param.Bool(False, "Use direction info") + # If true, use random to decide whether to allocate or not, and only try + # with one entry + restrictAllocation = Param.Bool(False, + "Restrict the allocation conditions") + + initialLoopIter = Param.Unsigned(1, "Initial iteration number") + initialLoopAge = Param.Unsigned(255, "Initial age value") + optionalAgeReset = Param.Bool(True, + "Reset age bits optionally in some cases") + + +# LTAGE branch predictor as described in +# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf +# It is basically a TAGE predictor plus a loop predictor +# The differnt TAGE sizes are updated according to the paper values (256 Kbits) +class LTAGE(TAGE): + type = 'LTAGE' + cxx_class = 'LTAGE' + cxx_header = "cpu/pred/ltage.hh" + + tage = LTAGE_TAGE() + loop_predictor = Param.LoopPredictor(LoopPredictor(), "Loop predictor") diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 96451f253..9e7e166ea 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -45,6 +45,7 @@ Source('tournament.cc') Source ('bi_mode.cc') Source('tage_base.cc') Source('tage.cc') +Source('loop_predictor.cc') Source('ltage.cc') DebugFlag('FreeList') DebugFlag('Branch') diff --git a/src/cpu/pred/loop_predictor.cc b/src/cpu/pred/loop_predictor.cc new file mode 100644 index 000000000..2e709f392 --- /dev/null +++ b/src/cpu/pred/loop_predictor.cc @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +#include "cpu/pred/loop_predictor.hh" + +#include "params/LoopPredictor.hh" + +LoopPredictor::LoopPredictor(LoopPredictorParams *p) + : SimObject(p), logSizeLoopPred(p->logSizeLoopPred), + loopTableAgeBits(p->loopTableAgeBits), + loopTableConfidenceBits(p->loopTableConfidenceBits), + loopTableTagBits(p->loopTableTagBits), + loopTableIterBits(p->loopTableIterBits), + logLoopTableAssoc(p->logLoopTableAssoc), + confidenceThreshold((1 << loopTableConfidenceBits) - 1), + loopTagMask((1 << loopTableTagBits) - 1), + loopNumIterMask((1 << loopTableIterBits) - 1), + loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), + loopUseCounter(-1), + withLoopBits(p->withLoopBits), + useDirectionBit(p->useDirectionBit), + useSpeculation(p->useSpeculation), + useHashing(p->useHashing), + restrictAllocation(p->restrictAllocation), + initialLoopIter(p->initialLoopIter), + initialLoopAge(p->initialLoopAge), + optionalAgeReset(p->optionalAgeReset) +{ + assert(initialLoopAge <= ((1 << loopTableAgeBits) - 1)); +} + +void +LoopPredictor::init() +{ + // we use uint16_t type for these vales, so they cannot be more than + // 16 bits + assert(loopTableTagBits <= 16); + assert(loopTableIterBits <= 16); + + assert(logSizeLoopPred >= logLoopTableAssoc); + + ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; +} + +LoopPredictor::BranchInfo* +LoopPredictor::makeBranchInfo() +{ + return new BranchInfo(); +} + +int +LoopPredictor::lindex(Addr pc_in, unsigned instShiftAmt) const +{ + // The loop table is implemented as a linear table + // If associativity is N (N being 1 << logLoopTableAssoc), + // the first N entries are for set 0, the next N entries are for set 1, + // and so on. + // Thus, this function calculates the set and then it gets left shifted + // by logLoopTableAssoc in order to return the index of the first of the + // N entries of the set + Addr pc = pc_in >> instShiftAmt; + if (useHashing) { + pc ^= pc_in; + } + return ((pc & loopSetMask) << logLoopTableAssoc); +} + +int +LoopPredictor::finallindex(int index, int lowPcBits, int way) const +{ + return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : + (index)) + + way; +} + +//loop prediction: only used if high confidence +bool +LoopPredictor::getLoop(Addr pc, BranchInfo* bi, bool speculative, + unsigned instShiftAmt) const +{ + bi->loopHit = -1; + bi->loopPredValid = false; + bi->loopIndex = lindex(pc, instShiftAmt); + + if (useHashing) { + unsigned pcShift = logSizeLoopPred - logLoopTableAssoc; + bi->loopIndexB = (pc >> pcShift) & loopSetMask; + bi->loopTag = (pc >> pcShift) ^ (pc >> (pcShift + loopTableTagBits)); + bi->loopTag &= loopTagMask; + } else { + unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; + bi->loopTag = (pc >> pcShift) & loopTagMask; + // bi->loopIndexB is not used without hash + } + + for (int i = 0; i < (1 << logLoopTableAssoc); i++) { + int idx = finallindex(bi->loopIndex, bi->loopIndexB, i); + if (ltable[idx].tag == bi->loopTag) { + bi->loopHit = i; + bi->loopPredValid = calcConf(idx); + + uint16_t iter = speculative ? ltable[idx].currentIterSpec + : ltable[idx].currentIter; + + if ((iter + 1) == ltable[idx].numIter) { + return useDirectionBit ? !(ltable[idx].dir) : false; + } else { + return useDirectionBit ? (ltable[idx].dir) : true; + } + } + } + return false; +} + +bool +LoopPredictor::calcConf(int index) const +{ + return ltable[index].confidence == confidenceThreshold; +} + +void +LoopPredictor::specLoopUpdate(bool taken, BranchInfo* bi) +{ + if (bi->loopHit>=0) { + int index = finallindex(bi->loopIndex, bi->loopIndexB, bi->loopHit); + if (taken != ltable[index].dir) { + ltable[index].currentIterSpec = 0; + } else { + ltable[index].currentIterSpec = + (ltable[index].currentIterSpec + 1) & loopNumIterMask; + } + } +} + +bool +LoopPredictor::optionalAgeInc(int nrand) const +{ + return false; +} + +void +LoopPredictor::loopUpdate(Addr pc, bool taken, BranchInfo* bi, bool tage_pred, + int random0, int random1, int random2) +{ + int idx = finallindex(bi->loopIndex, bi->loopIndexB, bi->loopHit); + if (bi->loopHit >= 0) { + //already a hit + if (bi->loopPredValid) { + if (taken != bi->loopPred) { + // free the entry + ltable[idx].numIter = 0; + ltable[idx].age = 0; + ltable[idx].confidence = 0; + ltable[idx].currentIter = 0; + return; + } else if (bi->loopPred != tage_pred || optionalAgeInc(random0)) { + unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); + } + } + + ltable[idx].currentIter = + (ltable[idx].currentIter + 1) & loopNumIterMask; + if (ltable[idx].currentIter > ltable[idx].numIter) { + ltable[idx].confidence = 0; + if (ltable[idx].numIter != 0) { + // free the entry + ltable[idx].numIter = 0; + if (optionalAgeReset) { + ltable[idx].age = 0; + } + } + } + + if (taken != (useDirectionBit ? ltable[idx].dir : true)) { + if (ltable[idx].currentIter == ltable[idx].numIter) { + unsignedCtrUpdate(ltable[idx].confidence, true, + loopTableConfidenceBits); + //just do not predict when the loop count is 1 or 2 + if (ltable[idx].numIter < 3) { + // free the entry + ltable[idx].dir = taken; // ignored if no useDirectionBit + ltable[idx].numIter = 0; + ltable[idx].age = 0; + ltable[idx].confidence = 0; + } + } else { + if (ltable[idx].numIter == 0) { + // first complete nest; + ltable[idx].confidence = 0; + ltable[idx].numIter = ltable[idx].currentIter; + } else { + //not the same number of iterations as last time: free the + //entry + ltable[idx].numIter = 0; + if (optionalAgeReset) { + ltable[idx].age = 0; + } + ltable[idx].confidence = 0; + } + } + ltable[idx].currentIter = 0; + } + + } else if (useDirectionBit ? (bi->predTaken != taken) : taken) { + if ((random2 & 3) == 0 || !restrictAllocation) { + //try to allocate an entry on taken branch + int nrand = random1; + for (int i = 0; i < (1 << logLoopTableAssoc); i++) { + int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); + idx = finallindex(bi->loopIndex, bi->loopIndexB, loop_hit); + if (ltable[idx].age == 0) { + ltable[idx].dir = !taken; // ignored if no useDirectionBit + ltable[idx].tag = bi->loopTag; + ltable[idx].numIter = 0; + ltable[idx].age = initialLoopAge; + ltable[idx].confidence = 0; + ltable[idx].currentIter = initialLoopIter; + break; + + } else { + ltable[idx].age--; + } + if (restrictAllocation) { + break; + } + } + } + } +} + +bool +LoopPredictor::loopPredict(ThreadID tid, Addr branch_pc, bool cond_branch, + BranchInfo* bi, bool prev_pred_taken, unsigned instShiftAmt) +{ + bool pred_taken = prev_pred_taken; + if (cond_branch) { + // loop prediction + bi->loopPred = getLoop(branch_pc, bi, useSpeculation, instShiftAmt); + + if ((loopUseCounter >= 0) && bi->loopPredValid) { + pred_taken = bi->loopPred; + bi->loopPredUsed = true; + } + + if (useSpeculation) { + specLoopUpdate(pred_taken, bi); + } + } + + return pred_taken; +} + +void +LoopPredictor::squash(ThreadID tid, BranchInfo *bi) +{ + if (bi->loopHit >= 0) { + int idx = finallindex(bi->loopIndex, + bi->loopIndexB, + bi->loopHit); + ltable[idx].currentIterSpec = bi->currentIter; + } +} + +void +LoopPredictor::squashLoop(BranchInfo* bi) +{ + if (bi->loopHit >= 0) { + int idx = finallindex(bi->loopIndex, + bi->loopIndexB, + bi->loopHit); + ltable[idx].currentIterSpec = bi->currentIter; + } +} + +void +LoopPredictor::updateStats(bool taken, BranchInfo* bi) +{ + if (taken == bi->loopPred) { + loopPredictorCorrect++; + } else { + loopPredictorWrong++; + } +} + +void +LoopPredictor::condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken, + bool tage_pred, BranchInfo* bi, + unsigned instShiftAmt, int rand0, int rand1, + int rand2) +{ + if (useSpeculation) { + // recalculate loop prediction without speculation + // It is ok to overwrite the loop prediction fields in bi + // as the stats have already been updated with the previous + // values + bi->loopPred = getLoop(branch_pc, bi, false, instShiftAmt); + } + + if (bi->loopPredValid) { + if (bi->predTaken != bi->loopPred) { + signedCtrUpdate(loopUseCounter, + (bi->loopPred == taken), + withLoopBits); + } + } + + loopUpdate(branch_pc, taken, bi, tage_pred, rand0, rand1, rand2); +} + +void +LoopPredictor::regStats() +{ + loopPredictorCorrect + .name(name() + ".loopPredictorCorrect") + .desc("Number of times the loop predictor is the provider and " + "the prediction is correct"); + + loopPredictorWrong + .name(name() + ".loopPredictorWrong") + .desc("Number of times the loop predictor is the provider and " + "the prediction is wrong"); +} + +LoopPredictor * +LoopPredictorParams::create() +{ + return new LoopPredictor(this); +} diff --git a/src/cpu/pred/loop_predictor.hh b/src/cpu/pred/loop_predictor.hh new file mode 100644 index 000000000..3579c5763 --- /dev/null +++ b/src/cpu/pred/loop_predictor.hh @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +#ifndef __CPU_PRED_LOOP_PREDICTOR_HH__ +#define __CPU_PRED_LOOP_PREDICTOR_HH__ + +#include "base/statistics.hh" +#include "base/types.hh" +#include "sim/sim_object.hh" + +struct LoopPredictorParams; + +class LoopPredictor : public SimObject +{ + protected: + const unsigned logSizeLoopPred; + const unsigned loopTableAgeBits; + const unsigned loopTableConfidenceBits; + const unsigned loopTableTagBits; + const unsigned loopTableIterBits; + const unsigned logLoopTableAssoc; + const uint8_t confidenceThreshold; + const uint16_t loopTagMask; + const uint16_t loopNumIterMask; + const int loopSetMask; + + // Prediction Structures + // Loop Predictor Entry + struct LoopEntry + { + uint16_t numIter; + uint16_t currentIter; + uint16_t currentIterSpec; // only for useSpeculation + uint8_t confidence; + uint16_t tag; + uint8_t age; + bool dir; // only for useDirectionBit + + LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0), + confidence(0), tag(0), age(0), dir(0) { } + }; + + LoopEntry *ltable; + + int8_t loopUseCounter; + unsigned withLoopBits; + + const bool useDirectionBit; + const bool useSpeculation; + const bool useHashing; + const bool restrictAllocation; + const unsigned initialLoopIter; + const unsigned initialLoopAge; + const bool optionalAgeReset; + + // stats + Stats::Scalar loopPredictorCorrect; + Stats::Scalar loopPredictorWrong; + + /** + * Updates an unsigned counter based on up/down parameter + * @param ctr Reference to counter to update. + * @param up Boolean indicating if the counter is incremented/decremented + * If true it is incremented, if false it is decremented + * @param nbits Counter width. + */ + static inline void unsignedCtrUpdate(uint8_t &ctr, bool up, unsigned nbits) + { + assert(nbits <= sizeof(uint8_t) << 3); + if (up) { + if (ctr < ((1 << nbits) - 1)) + ctr++; + } else { + if (ctr) + ctr--; + } + } + static inline void signedCtrUpdate(int8_t &ctr, bool up, unsigned nbits) + { + if (up) { + if (ctr < ((1 << (nbits - 1)) - 1)) + ctr++; + } else { + if (ctr > -(1 << (nbits - 1))) + ctr--; + } + } + public: + // Primary branch history entry + struct BranchInfo + { + uint16_t loopTag; + uint16_t currentIter; + + bool loopPred; + bool loopPredValid; + bool loopPredUsed; + int loopIndex; + int loopIndexB; // only for useHashing + int loopHit; + bool predTaken; + + BranchInfo() + : loopTag(0), currentIter(0), + loopPred(false), + loopPredValid(false), loopIndex(0), loopIndexB(0), loopHit(0), + predTaken(false) + {} + }; + + /** + * Computes the index used to access the + * loop predictor. + * @param pc_in The unshifted branch PC. + * @param instShiftAmt Shift the pc by as many bits + */ + int lindex(Addr pc_in, unsigned instShiftAmt) const; + + /** + * Computes the index used to access the + * ltable structures. + * It may take hashing into account + * @param index Result of lindex function + * @param lowPcBits PC bits masked with set size + * @param way Way to be used + */ + int finallindex(int lindex, int lowPcBits, int way) const; + + /** + * Get a branch prediction from the loop + * predictor. + * @param pc The unshifted branch PC. + * @param bi Pointer to information on the + * prediction. + * @param speculative Use speculative number of iterations + * @param instShiftAmt Shift the pc by as many bits (if hashing is not + * used) + * @result the result of the prediction, if it could be predicted + */ + bool getLoop(Addr pc, BranchInfo* bi, bool speculative, + unsigned instShiftAmt) const; + + /** + * Updates the loop predictor. + * @param pc The unshifted branch PC. + * @param taken The actual branch outcome. + * @param bi Pointer to information on the + * prediction recorded at prediction time. + * @param tage_pred tage prediction of the branch + * @param random0 random value + * @param random1 random value + * @param random2 random value + */ + void loopUpdate(Addr pc, bool Taken, BranchInfo* bi, bool tage_pred, + int random0, int random1, int random2); + + /** + * Speculatively updates the loop predictor + * iteration count (only for useSpeculation). + * @param taken The predicted branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void specLoopUpdate(bool taken, BranchInfo* bi); + + /** + * Update LTAGE for conditional branches. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param tage_pred Prediction from TAGE + * @param bi Pointer to information on the prediction + * recorded at prediction time. + * @param instShiftAmt Number of bits to shift instructions + * @param rand0 Random integer value + * @param rand1 Random integer value + * @param rand2 Random integer value + */ + void condBranchUpdate( + ThreadID tid, Addr branch_pc, bool taken, bool tage_pred, + BranchInfo* bi, unsigned instShiftAmt, int rand0, int rand1, + int rand2); + + /** + * Get the loop prediction + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param bi Reference to wrapping pointer to allow storing + * derived class prediction information in the base class. + * @param prev_pred_taken Result of the TAGE prediction + * @param instShiftAmt Shift the pc by as many bits + * @param instShiftAmt Shift the pc by as many bits + * @result the prediction, true if taken + */ + bool loopPredict( + ThreadID tid, Addr branch_pc, bool cond_branch, + BranchInfo* bi, bool prev_pred_taken, unsigned instShiftAmt); + + /** + * Update the stats + * @param taken Actual branch outcome + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void updateStats(bool taken, BranchInfo* bi); + + void squashLoop(BranchInfo * bi); + + void squash(ThreadID tid, BranchInfo *bi); + + virtual bool calcConf(int index) const; + + virtual bool optionalAgeInc(int nrand) const; + + virtual BranchInfo *makeBranchInfo(); + + /** + * Gets the value of the loop use counter + * @return the loop use counter value + */ + int8_t getLoopUseCounter() const + { + return loopUseCounter; + } + + /** + * Initialize the loop predictor + */ + void init() override; + + /** + * Register stats for this object + */ + void regStats() override; + + LoopPredictor(LoopPredictorParams *p); +}; +#endif//__CPU_PRED_LOOP_PREDICTOR_HH__ diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index 22945ecda..cab86e459 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -42,239 +42,43 @@ #include "base/intmath.hh" #include "base/logging.hh" -#include "base/random.hh" #include "base/trace.hh" #include "debug/Fetch.hh" #include "debug/LTage.hh" LTAGE::LTAGE(const LTAGEParams *params) - : TAGE(params), - logSizeLoopPred(params->logSizeLoopPred), - loopTableAgeBits(params->loopTableAgeBits), - loopTableConfidenceBits(params->loopTableConfidenceBits), - loopTableTagBits(params->loopTableTagBits), - loopTableIterBits(params->loopTableIterBits), - logLoopTableAssoc(params->logLoopTableAssoc), - confidenceThreshold((1 << loopTableConfidenceBits) - 1), - loopTagMask((1 << loopTableTagBits) - 1), - loopNumIterMask((1 << loopTableIterBits) - 1), - loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), - loopUseCounter(0), - withLoopBits(params->withLoopBits), - useDirectionBit(params->useDirectionBit), - useSpeculation(params->useSpeculation), - useHashing(params->useHashing) + : TAGE(params), loopPredictor(params->loop_predictor) { - // we use uint16_t type for these vales, so they cannot be more than - // 16 bits - assert(loopTableTagBits <= 16); - assert(loopTableIterBits <= 16); - - assert(logSizeLoopPred >= logLoopTableAssoc); - - ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; -} - -int -LTAGE::lindex(Addr pc_in) const -{ - // The loop table is implemented as a linear table - // If associativity is N (N being 1 << logLoopTableAssoc), - // the first N entries are for set 0, the next N entries are for set 1, - // and so on. - // Thus, this function calculates the set and then it gets left shifted - // by logLoopTableAssoc in order to return the index of the first of the - // N entries of the set - Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; - Addr pc = pc_in >> instShiftAmt; - if (useHashing) { - // copied from TAGE-SC-L - // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) - pc ^= (pc_in >> (instShiftAmt + logLoopTableAssoc)); - } - return ((pc & mask) << logLoopTableAssoc); -} - -int -LTAGE::finallindex(int index, int lowPcBits, int way) const -{ - // copied from TAGE-SC-L - // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) - return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : - (index)) - + way; -} - -//loop prediction: only used if high confidence -bool -LTAGE::getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const -{ - bi->loopHit = -1; - bi->loopPredValid = false; - bi->loopIndex = lindex(pc); - unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; - bi->loopTag = ((pc) >> pcShift) & loopTagMask; - - if (useHashing) { - bi->loopTag ^= ((pc >> (pcShift + logSizeLoopPred)) & loopTagMask); - bi->loopLowPcBits = (pc >> pcShift) & loopSetMask; - } - - for (int i = 0; i < (1 << logLoopTableAssoc); i++) { - int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, i); - if (ltable[idx].tag == bi->loopTag) { - bi->loopHit = i; - bi->loopPredValid = - ltable[idx].confidence == confidenceThreshold; - - uint16_t iter = speculative ? ltable[idx].currentIterSpec - : ltable[idx].currentIter; - - if ((iter + 1) == ltable[idx].numIter) { - return useDirectionBit ? !(ltable[idx].dir) : false; - } else { - return useDirectionBit ? (ltable[idx].dir) : true; - } - } - } - return false; -} - -void -LTAGE::specLoopUpdate(bool taken, LTageBranchInfo* bi) -{ - if (bi->loopHit>=0) { - int index = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); - if (taken != ltable[index].dir) { - ltable[index].currentIterSpec = 0; - } else { - ltable[index].currentIterSpec = - (ltable[index].currentIterSpec + 1) & loopNumIterMask; - } - } -} - -void -LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) -{ - int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); - if (bi->loopHit >= 0) { - //already a hit - if (bi->loopPredValid) { - if (taken != bi->loopPred) { - // free the entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - ltable[idx].currentIter = 0; - return; - } else if (bi->loopPred != bi->tageBranchInfo->tagePred) { - DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); - TAGEBase::unsignedCtrUpdate(ltable[idx].age, true, - loopTableAgeBits); - } - } - - ltable[idx].currentIter = - (ltable[idx].currentIter + 1) & loopNumIterMask; - if (ltable[idx].currentIter > ltable[idx].numIter) { - ltable[idx].confidence = 0; - if (ltable[idx].numIter != 0) { - // free the entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } - - if (taken != (useDirectionBit ? ltable[idx].dir : true)) { - if (ltable[idx].currentIter == ltable[idx].numIter) { - DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); - - TAGEBase::unsignedCtrUpdate(ltable[idx].confidence, true, - loopTableConfidenceBits); - //just do not predict when the loop count is 1 or 2 - if (ltable[idx].numIter < 3) { - // free the entry - ltable[idx].dir = taken; // ignored if no useDirectionBit - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } else { - DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); - if (ltable[idx].numIter == 0) { - // first complete nest; - ltable[idx].confidence = 0; - ltable[idx].numIter = ltable[idx].currentIter; - } else { - //not the same number of iterations as last time: free the - //entry - ltable[idx].numIter = 0; - ltable[idx].age = 0; - ltable[idx].confidence = 0; - } - } - ltable[idx].currentIter = 0; - } - - } else if (useDirectionBit ? - ((bi->loopPredValid ? - bi->loopPred : bi->tageBranchInfo->tagePred) != taken) : - taken) { - //try to allocate an entry on taken branch - int nrand = TAGEBase::getRandom(); - for (int i = 0; i < (1 << logLoopTableAssoc); i++) { - int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); - idx = bi->loopIndex + loop_hit; - if (ltable[idx].age == 0) { - DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", - pc); - ltable[idx].dir = !taken; // ignored if no useDirectionBit - ltable[idx].tag = bi->loopTag; - ltable[idx].numIter = 0; - ltable[idx].age = (1 << loopTableAgeBits) - 1; - ltable[idx].confidence = 0; - ltable[idx].currentIter = 1; - break; - - } - else - ltable[idx].age--; - } - } - } //prediction bool LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) { - LTageBranchInfo *bi = new LTageBranchInfo(*tage); + LTageBranchInfo *bi = new LTageBranchInfo(*tage, *loopPredictor); b = (void*)(bi); bool pred_taken = tage->tagePredict(tid, branch_pc, cond_branch, bi->tageBranchInfo); + pred_taken = loopPredictor->loopPredict(tid, branch_pc, cond_branch, + bi->lpBranchInfo, pred_taken, + instShiftAmt); if (cond_branch) { - // loop prediction - bi->loopPred = getLoop(branch_pc, bi, useSpeculation); - - if ((loopUseCounter >= 0) && bi->loopPredValid) { - pred_taken = bi->loopPred; + if (bi->lpBranchInfo->loopPredUsed) { bi->tageBranchInfo->provider = LOOP; } DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", - branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, - loopUseCounter, bi->tageBranchInfo->tagePred, - bi->tageBranchInfo->altTaken); - - if (useSpeculation) { - specLoopUpdate(pred_taken, bi); - } + branch_pc, pred_taken, bi->lpBranchInfo->loopPred, + bi->lpBranchInfo->loopPredValid, + loopPredictor->getLoopUseCounter(), + bi->tageBranchInfo->tagePred, bi->tageBranchInfo->altTaken); } + // record final prediction + bi->lpBranchInfo->predTaken = pred_taken; + return pred_taken; } @@ -286,12 +90,17 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(bp_history); + assert(corrTarget != MaxAddr); + if (squashed) { if (tage->isSpeculativeUpdateEnabled()) { // This restores the global history, then update it // and recomputes the folded histories. tage->squash(tid, taken, bi->tageBranchInfo, corrTarget); - squashLoop(bi); + + if (bi->tageBranchInfo->condBranch) { + loopPredictor->squashLoop(bi->lpBranchInfo); + } } return; } @@ -301,31 +110,13 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", branch_pc, taken); tage->updateStats(taken, bi->tageBranchInfo); - // update stats - if (bi->tageBranchInfo->provider == LOOP) { - if (taken == bi->loopPred) { - loopPredictorCorrect++; - } else { - loopPredictorWrong++; - } - } - // cond Branch Update - if (useSpeculation) { - // recalculate loop prediction without speculation - // It is ok to overwrite the loop prediction fields in bi - // as the stats have already been updated with the previous - // values - bi->loopPred = getLoop(branch_pc, bi, false); - } - if (bi->loopPredValid) { - if (bi->tageBranchInfo->tagePred != bi->loopPred) { - TAGEBase::ctrUpdate(loopUseCounter, - (bi->loopPred == taken), - withLoopBits); - } - } - loopUpdate(branch_pc, taken, bi); + loopPredictor->updateStats(taken, bi->lpBranchInfo); + + loopPredictor->condBranchUpdate(tid, branch_pc, taken, + bi->tageBranchInfo->tagePred, bi->lpBranchInfo, instShiftAmt, + TAGEBase::getRandom(), TAGEBase::getRandom(), + TAGEBase::getRandom()); tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo, nrand, corrTarget); @@ -338,25 +129,12 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, } void -LTAGE::squashLoop(LTageBranchInfo* bi) -{ - if (bi->tageBranchInfo->condBranch) { - if (bi->loopHit >= 0) { - int idx = finallindex(bi->loopIndex, - bi->loopLowPcBits, - bi->loopHit); - ltable[idx].currentIterSpec = bi->currentIter; - } - } -} - -void LTAGE::squash(ThreadID tid, void *bp_history) { LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); if (bi->tageBranchInfo->condBranch) { - squashLoop(bi); + loopPredictor->squash(tid, bi->lpBranchInfo); } TAGE::squash(tid, bp_history); @@ -366,20 +144,8 @@ void LTAGE::regStats() { TAGE::regStats(); - - loopPredictorCorrect - .name(name() + ".loopPredictorCorrect") - .desc("Number of times the loop predictor is the provider and " - "the prediction is correct"); - - loopPredictorWrong - .name(name() + ".loopPredictorWrong") - .desc("Number of times the loop predictor is the provider and " - "the prediction is wrong"); } - - LTAGE* LTAGEParams::create() { diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index c749cba2c..caddbc6a6 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -56,10 +56,11 @@ #include <vector> #include "base/types.hh" +#include "cpu/pred/loop_predictor.hh" #include "cpu/pred/tage.hh" #include "params/LTAGE.hh" -class LTAGE: public TAGE +class LTAGE : public TAGE { public: LTAGE(const LTAGEParams *params); @@ -68,101 +69,34 @@ class LTAGE: public TAGE void squash(ThreadID tid, void *bp_history) override; void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, bool squashed, const StaticInstPtr & inst, - Addr corrTarget) override; + Addr corrTarget = MaxAddr) override; + virtual void regStats() override; - void regStats() override; - - private: - // Prediction Structures - // Loop Predictor Entry - struct LoopEntry - { - uint16_t numIter; - uint16_t currentIter; - uint16_t currentIterSpec; // only for useSpeculation - uint8_t confidence; - uint16_t tag; - uint8_t age; - bool dir; // only for useDirectionBit - - LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0), - confidence(0), tag(0), age(0), dir(0) { } - }; + protected: + /** The loop predictor object */ + LoopPredictor *loopPredictor; // more provider types enum { - LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1 + LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1, + LAST_LTAGE_PROVIDER_TYPE = LOOP }; // Primary branch history entry struct LTageBranchInfo : public TageBranchInfo { - uint16_t loopTag; - uint16_t currentIter; - - bool loopPred; - bool loopPredValid; - int loopIndex; - int loopLowPcBits; // only for useHashing - int loopHit; - - LTageBranchInfo(TAGEBase &tage) - : TageBranchInfo(tage), - loopTag(0), currentIter(0), - loopPred(false), - loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0) + LoopPredictor::BranchInfo *lpBranchInfo; + LTageBranchInfo(TAGEBase &tage, LoopPredictor &lp) + : TageBranchInfo(tage), lpBranchInfo(lp.makeBranchInfo()) {} virtual ~LTageBranchInfo() - {} + { + delete lpBranchInfo; + } }; /** - * Computes the index used to access the - * loop predictor. - * @param pc_in The unshifted branch PC. - */ - int lindex(Addr pc_in) const; - - /** - * Computes the index used to access the - * ltable structures. - * It may take hashing into account - * @param index Result of lindex function - * @param lowPcBits PC bits masked with set size - * @param way Way to be used - */ - int finallindex(int lindex, int lowPcBits, int way) const; - - /** - * Get a branch prediction from the loop - * predictor. - * @param pc The unshifted branch PC. - * @param bi Pointer to information on the - * prediction. - * @param speculative Use speculative number of iterations - */ - bool getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const; - - /** - * Updates the loop predictor. - * @param pc The unshifted branch PC. - * @param taken The actual branch outcome. - * @param bi Pointer to information on the - * prediction recorded at prediction time. - */ - void loopUpdate(Addr pc, bool Taken, LTageBranchInfo* bi); - - /** - * Speculatively updates the loop predictor - * iteration count (only for useSpeculation). - * @param taken The predicted branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void specLoopUpdate(bool taken, LTageBranchInfo* bi); - - /** * Get a branch prediction from LTAGE. *NOT* an override of * BpredUnit::predict(). * @param tid The thread ID to select the global @@ -174,38 +108,6 @@ class LTAGE: public TAGE */ bool predict( ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override; - - /** - * Restores speculatively updated path and direction histories. - * Also recomputes compressed (folded) histories based on the - * correct branch outcome. - * @param bi Branch information. - */ - void squashLoop(LTageBranchInfo * bi); - - const unsigned logSizeLoopPred; - const unsigned loopTableAgeBits; - const unsigned loopTableConfidenceBits; - const unsigned loopTableTagBits; - const unsigned loopTableIterBits; - const unsigned logLoopTableAssoc; - const uint8_t confidenceThreshold; - const uint16_t loopTagMask; - const uint16_t loopNumIterMask; - const int loopSetMask; - - LoopEntry *ltable; - - int8_t loopUseCounter; - unsigned withLoopBits; - - const bool useDirectionBit; - const bool useSpeculation; - const bool useHashing; - - // stats - Stats::Scalar loopPredictorCorrect; - Stats::Scalar loopPredictorWrong; }; #endif // __CPU_PRED_LTAGE |