diff options
author | Pau Cabre <pau.cabre@metempsy.com> | 2018-11-23 23:29:35 +0100 |
---|---|---|
committer | Pau Cabre <pau.cabre@metempsy.com> | 2018-12-11 22:21:56 +0000 |
commit | 71f6fd3df47989f91829a261bba751ce40531795 (patch) | |
tree | a8fdd6db13165e9cdabc24c16139e4bdb3a8ba29 /src | |
parent | 866b200c202dded37fdd857a1a42ec149bd109c9 (diff) | |
download | gem5-71f6fd3df47989f91829a261bba751ce40531795.tar.xz |
cpu: Added parameters to enable/disable features in LTAGE
They are for the following features in the LTAGE loop predictor:
- Hashing for calculating the loop table entry
- Add direction information
- Add speculative iteration number information
Change-Id: I395f4526163ee0d0229d1e87cde2bb046f1dd43a
Signed-off-by: Pau Cabre <pau.cabre@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14597
Reviewed-by: Ilias Vougioukas <ilias.vougioukas@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Louis Delhez <ldelhez@ucla.edu>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cpu/pred/BranchPredictor.py | 16 | ||||
-rw-r--r-- | src/cpu/pred/ltage.cc | 93 | ||||
-rw-r--r-- | src/cpu/pred/ltage.hh | 30 |
3 files changed, 108 insertions, 31 deletions
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 2c622cd02..0c1e9c284 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -143,3 +143,19 @@ class LTAGE(TAGE): loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per loop") logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor associativity") + # Parameters for enabling modifications to the loop predictor + # They have been copied from ISL-TAGE + # (https://www.jilp.org/jwac-2/program/03_seznec.tgz) + # + # All of them should be disabled to match the original LTAGE implementation + # (http://hpca23.cse.tamu.edu/taco/camino/cbp2/cbp-src/realistic-seznec.h) + + # Add speculation + useSpeculation = Param.Bool(False, "Use speculation") + + # Add hashing for calculating the loop table index + useHashing = Param.Bool(False, "Use hashing") + + # Add a direction bit to the loop table entries + useDirectionBit = Param.Bool(False, "Use direction info") + diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index 73f477745..8f332b3b5 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -58,8 +58,12 @@ LTAGE::LTAGE(const LTAGEParams *params) confidenceThreshold((1 << loopTableConfidenceBits) - 1), loopTagMask((1 << loopTableTagBits) - 1), loopNumIterMask((1 << loopTableIterBits) - 1), + loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), loopUseCounter(0), - withLoopBits(params->withLoopBits) + withLoopBits(params->withLoopBits), + useDirectionBit(params->useDirectionBit), + useSpeculation(params->useSpeculation), + useHashing(params->useHashing) { // we use uint16_t type for these vales, so they cannot be more than // 16 bits @@ -82,12 +86,28 @@ LTAGE::lindex(Addr pc_in) const // by logLoopTableAssoc in order to return the index of the first of the // N entries of the set Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; - return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc); + Addr pc = pc_in >> instShiftAmt; + if (useHashing) { + // copied from TAGE-SC-L + // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) + pc ^= (pc_in >> (instShiftAmt + logLoopTableAssoc)); + } + return ((pc & mask) << logLoopTableAssoc); +} + +int +LTAGE::finallindex(int index, int lowPcBits, int way) const +{ + // copied from TAGE-SC-L + // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) + return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : + (index)) + + way; } //loop prediction: only used if high confidence bool -LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const +LTAGE::getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const { bi->loopHit = -1; bi->loopPredValid = false; @@ -95,17 +115,25 @@ LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; bi->loopTag = ((pc) >> pcShift) & loopTagMask; + if (useHashing) { + bi->loopTag ^= ((pc >> (pcShift + logSizeLoopPred)) & loopTagMask); + bi->loopLowPcBits = (pc >> pcShift) & loopSetMask; + } + for (int i = 0; i < (1 << logLoopTableAssoc); i++) { - if (ltable[bi->loopIndex + i].tag == bi->loopTag) { + int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, i); + if (ltable[idx].tag == bi->loopTag) { bi->loopHit = i; bi->loopPredValid = - ltable[bi->loopIndex + i].confidence == confidenceThreshold; - bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; - if (ltable[bi->loopIndex + i].currentIterSpec + 1 == - ltable[bi->loopIndex + i].numIter) { - return !(ltable[bi->loopIndex + i].dir); - }else { - return (ltable[bi->loopIndex + i].dir); + ltable[idx].confidence == confidenceThreshold; + + uint16_t iter = speculative ? ltable[idx].currentIterSpec + : ltable[idx].currentIter; + + if ((iter + 1) == ltable[idx].numIter) { + return useDirectionBit ? !(ltable[idx].dir) : false; + } else { + return useDirectionBit ? (ltable[idx].dir) : true; } } } @@ -113,10 +141,10 @@ LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const } void -LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) +LTAGE::specLoopUpdate(bool taken, LTageBranchInfo* bi) { if (bi->loopHit>=0) { - int index = lindex(pc); + int index = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); if (taken != ltable[index].dir) { ltable[index].currentIterSpec = 0; } else { @@ -129,7 +157,7 @@ LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) void LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) { - int idx = bi->loopIndex + bi->loopHit; + int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); if (bi->loopHit >= 0) { //already a hit if (bi->loopPredValid) { @@ -158,7 +186,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) } } - if (taken != ltable[idx].dir) { + if (taken != (useDirectionBit ? ltable[idx].dir : true)) { if (ltable[idx].currentIter == ltable[idx].numIter) { DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); @@ -167,7 +195,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) //just do not predict when the loop count is 1 or 2 if (ltable[idx].numIter < 3) { // free the entry - ltable[idx].dir = taken; + ltable[idx].dir = taken; // ignored if no useDirectionBit ltable[idx].numIter = 0; ltable[idx].age = 0; ltable[idx].confidence = 0; @@ -189,7 +217,9 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) ltable[idx].currentIter = 0; } - } else if (taken) { + } else if (useDirectionBit ? + ((bi->loopPredValid ? bi->loopPred : bi->tagePred) != taken) : + taken) { //try to allocate an entry on taken branch int nrand = random_mt.random<int>(); for (int i = 0; i < (1 << logLoopTableAssoc); i++) { @@ -198,7 +228,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) if (ltable[idx].age == 0) { DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", pc); - ltable[idx].dir = !taken; + ltable[idx].dir = !taken; // ignored if no useDirectionBit ltable[idx].tag = bi->loopTag; ltable[idx].numIter = 0; ltable[idx].age = (1 << loopTableAgeBits) - 1; @@ -224,7 +254,8 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi); if (cond_branch) { - bi->loopPred = getLoop(branch_pc, bi); // loop prediction + // loop prediction + bi->loopPred = getLoop(branch_pc, bi, useSpeculation); if ((loopUseCounter >= 0) && bi->loopPredValid) { pred_taken = bi->loopPred; @@ -234,9 +265,12 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, loopUseCounter, bi->tagePred, bi->altTaken); + + if (useSpeculation) { + specLoopUpdate(pred_taken, bi); + } } - specLoopUpdate(branch_pc, pred_taken, bi); return pred_taken; } @@ -246,8 +280,13 @@ LTAGE::condBranchUpdate(Addr branch_pc, bool taken, { LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi); - // first update the loop predictor - loopUpdate(branch_pc, taken, bi); + if (useSpeculation) { + // recalculate loop prediction without speculation + // It is ok to overwrite the loop prediction fields in bi + // as the stats have already been updated with the previous + // values + bi->loopPred = getLoop(branch_pc, bi, false); + } if (bi->loopPredValid) { if (bi->tagePred != bi->loopPred) { @@ -257,6 +296,8 @@ LTAGE::condBranchUpdate(Addr branch_pc, bool taken, } } + loopUpdate(branch_pc, taken, bi); + TAGE::condBranchUpdate(branch_pc, taken, bi, nrand); } @@ -269,7 +310,9 @@ LTAGE::squash(ThreadID tid, bool taken, void *bp_history) if (bi->condBranch) { if (bi->loopHit >= 0) { - int idx = bi->loopIndex + bi->loopHit; + int idx = finallindex(bi->loopIndex, + bi->loopLowPcBits, + bi->loopHit); ltable[idx].currentIterSpec = bi->currentIter; } } @@ -281,7 +324,9 @@ LTAGE::squash(ThreadID tid, void *bp_history) LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); if (bi->condBranch) { if (bi->loopHit >= 0) { - int idx = bi->loopIndex + bi->loopHit; + int idx = finallindex(bi->loopIndex, + bi->loopLowPcBits, + bi->loopHit); ltable[idx].currentIterSpec = bi->currentIter; } } diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index e9e34b75d..94ff96832 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -76,11 +76,11 @@ class LTAGE: public TAGE { uint16_t numIter; uint16_t currentIter; - uint16_t currentIterSpec; + uint16_t currentIterSpec; // only for useSpeculation uint8_t confidence; uint16_t tag; uint8_t age; - bool dir; + bool dir; // only for useDirectionBit LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0), confidence(0), tag(0), age(0), dir(0) { } @@ -100,13 +100,14 @@ class LTAGE: public TAGE bool loopPred; bool loopPredValid; int loopIndex; + int loopLowPcBits; // only for useHashing int loopHit; LTageBranchInfo(int sz) : TageBranchInfo(sz), loopTag(0), currentIter(0), loopPred(false), - loopPredValid(false), loopIndex(0), loopHit(0) + loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0) {} }; @@ -118,13 +119,24 @@ class LTAGE: public TAGE int lindex(Addr pc_in) const; /** + * Computes the index used to access the + * ltable structures. + * It may take hashing into account + * @param index Result of lindex function + * @param lowPcBits PC bits masked with set size + * @param way Way to be used + */ + int finallindex(int lindex, int lowPcBits, int way) const; + + /** * Get a branch prediction from the loop * predictor. * @param pc The unshifted branch PC. * @param bi Pointer to information on the * prediction. + * @param speculative Use speculative number of iterations */ - bool getLoop(Addr pc, LTageBranchInfo* bi) const; + bool getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const; /** * Updates the loop predictor. @@ -137,13 +149,12 @@ class LTAGE: public TAGE /** * Speculatively updates the loop predictor - * iteration count. - * @param pc The unshifted branch PC. + * iteration count (only for useSpeculation). * @param taken The predicted branch outcome. * @param bi Pointer to information on the prediction * recorded at prediction time. */ - void specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi); + void specLoopUpdate(bool taken, LTageBranchInfo* bi); /** * Update LTAGE for conditional branches. @@ -201,12 +212,17 @@ class LTAGE: public TAGE const uint8_t confidenceThreshold; const uint16_t loopTagMask; const uint16_t loopNumIterMask; + const int loopSetMask; LoopEntry *ltable; int8_t loopUseCounter; unsigned withLoopBits; + const bool useDirectionBit; + const bool useSpeculation; + const bool useHashing; + // stats Stats::Scalar loopPredictorCorrect; Stats::Scalar loopPredictorWrong; |