diff options
author | Pau Cabre <pau.cabre@metempsy.com> | 2018-11-20 01:04:56 +0100 |
---|---|---|
committer | Pau Cabre <pau.cabre@metempsy.com> | 2018-11-28 17:25:50 +0000 |
commit | b2078cef37f10ebe1822e3f2a372c780eab91a7e (patch) | |
tree | 68317bc5e226baa1620496ff08dfcfff1f4a4c68 | |
parent | 3bb49cb2b01e55e33cd2ca7a872be65c49fabfc6 (diff) | |
download | gem5-b2078cef37f10ebe1822e3f2a372c780eab91a7e.tar.xz |
cpu: split LTAGE implementation into a base TAGE and a derived LTAGE
The new derived LTAGE is equivalent to the original LTAGE implementation
The default values of the TAGE branch predictor match the settings of the
8C-TAGE configuration described in https://www.jilp.org/vol8/v8paper1.pdf
Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Signed-off-by: Pau Cabre <pau.cabre@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14435
Reviewed-by: Sudhanshu Jha <sudhanshu.jha@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
-rw-r--r-- | src/cpu/pred/BranchPredictor.py | 53 | ||||
-rw-r--r-- | src/cpu/pred/SConscript | 2 | ||||
-rw-r--r-- | src/cpu/pred/ltage.cc | 556 | ||||
-rw-r--r-- | src/cpu/pred/ltage.hh | 300 | ||||
-rw-r--r-- | src/cpu/pred/tage.cc | 609 | ||||
-rw-r--r-- | src/cpu/pred/tage.hh | 374 |
6 files changed, 1088 insertions, 806 deletions
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 9d83abb0b..2c622cd02 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -87,34 +87,55 @@ class BiModeBP(BranchPredictor): choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") -class LTAGE(BranchPredictor): - type = 'LTAGE' - cxx_class = 'LTAGE' - cxx_header = "cpu/pred/ltage.hh" +# TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf +# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits) +class TAGE(BranchPredictor): + type = 'TAGE' + cxx_class = 'TAGE' + cxx_header = "cpu/pred/tage.hh" + nHistoryTables = Param.Unsigned(7, "Number of history tables") + minHist = Param.Unsigned(5, "Minimum history size of LTAGE") + maxHist = Param.Unsigned(130, "Maximum history size of LTAGE") + + tagTableTagWidths = VectorParam.Unsigned( + [0, 9, 9, 10, 10, 11, 11, 12], "Tag size in TAGE tag tables") + logTagTableSizes = VectorParam.Int( + [13, 9, 9, 9, 9, 9, 9, 9], "Log2 of TAGE table sizes") logRatioBiModalHystEntries = Param.Unsigned(2, "Log num of prediction entries for a shared hysteresis bit " \ "for the Bimodal") - logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor") - nHistoryTables = Param.Unsigned(12, "Number of history tables") + tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter bits") tagTableUBits = Param.Unsigned(2, "Number of tag table u bits") + histBufferSize = Param.Unsigned(2097152, "A large number to track all branch histories(2MEntries default)") - minHist = Param.Unsigned(4, "Minimum history size of LTAGE") - maxHist = Param.Unsigned(640, "Maximum history size of LTAGE") + pathHistBits = Param.Unsigned(16, "Path history size") - tagTableTagWidths = VectorParam.Unsigned( - [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15], - "Tag size in TAGE tag tables") - logTagTableSizes = VectorParam.Int( - [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9], - "Log2 of TAGE table sizes") - logUResetPeriod = Param.Unsigned(19, + logUResetPeriod = Param.Unsigned(18, "Log period in number of branches to reset TAGE useful counters") useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter") - withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter") + +# LTAGE branch predictor as described in +# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf +# It is basically a TAGE predictor plus a loop predictor +# The differnt TAGE sizes are updated according to the paper values (256 Kbits) +class LTAGE(TAGE): + type = 'LTAGE' + cxx_class = 'LTAGE' + cxx_header = "cpu/pred/ltage.hh" + + nHistoryTables = 12 + minHist = 4 + maxHist = 640 + tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15] + logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9] + logUResetPeriod = 19 + + logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor") + withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter") loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop entry") loopTableConfidenceBits = Param.Unsigned(2, "Number of confidence bits per loop entry") diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 1cdf7bbd7..b0e63096a 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -43,7 +43,9 @@ Source('indirect.cc') Source('ras.cc') Source('tournament.cc') Source ('bi_mode.cc') +Source('tage.cc') Source('ltage.cc') DebugFlag('FreeList') DebugFlag('Branch') +DebugFlag('Tage') DebugFlag('LTage') diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc index d6cc087e6..3be01936e 100644 --- a/src/cpu/pred/ltage.cc +++ b/src/cpu/pred/ltage.cc @@ -48,16 +48,8 @@ #include "debug/LTage.hh" LTAGE::LTAGE(const LTAGEParams *params) - : BPredUnit(params), - logRatioBiModalHystEntries(params->logRatioBiModalHystEntries), + : TAGE(params), logSizeLoopPred(params->logSizeLoopPred), - nHistoryTables(params->nHistoryTables), - tagTableCounterBits(params->tagTableCounterBits), - tagTableUBits(params->tagTableUBits), - histBufferSize(params->histBufferSize), - minHist(params->minHist), - maxHist(params->maxHist), - pathHistBits(params->pathHistBits), loopTableAgeBits(params->loopTableAgeBits), loopTableConfidenceBits(params->loopTableConfidenceBits), loopTableTagBits(params->loopTableTagBits), @@ -66,18 +58,9 @@ LTAGE::LTAGE(const LTAGEParams *params) confidenceThreshold((1 << loopTableConfidenceBits) - 1), loopTagMask((1 << loopTableTagBits) - 1), loopNumIterMask((1 << loopTableIterBits) - 1), - tagTableTagWidths(params->tagTableTagWidths), - logTagTableSizes(params->logTagTableSizes), - threadHistory(params->numThreads), - logUResetPeriod(params->logUResetPeriod), - useAltOnNaBits(params->useAltOnNaBits), + loopUseCounter(0), withLoopBits(params->withLoopBits) { - // Current method for periodically resetting the u counter bits only - // works for 1 or 2 bits - // Also make sure that it is not 0 - assert(tagTableUBits <= 2 && (tagTableUBits > 0)); - // we use uint16_t type for these vales, so they cannot be more than // 16 bits assert(loopTableTagBits <= 16); @@ -85,81 +68,7 @@ LTAGE::LTAGE(const LTAGEParams *params) assert(logSizeLoopPred >= logLoopTableAssoc); - // we use int type for the path history, so it cannot be more than - // its size - assert(pathHistBits <= (sizeof(int)*8)); - - // initialize the counter to half of the period - assert(logUResetPeriod != 0); - tCounter = ULL(1) << (logUResetPeriod - 1); - - assert(params->histBufferSize > params->maxHist * 2); - useAltPredForNewlyAllocated = 0; - - for (auto& history : threadHistory) { - history.pathHist = 0; - history.globalHistory = new uint8_t[histBufferSize]; - history.gHist = history.globalHistory; - memset(history.gHist, 0, histBufferSize); - history.ptGhist = 0; - } - - histLengths = new int [nHistoryTables+1]; - histLengths[1] = minHist; - histLengths[nHistoryTables] = maxHist; - - for (int i = 2; i <= nHistoryTables; i++) { - histLengths[i] = (int) (((double) minHist * - pow ((double) (maxHist) / (double) minHist, - (double) (i - 1) / (double) ((nHistoryTables- 1)))) - + 0.5); - } - - assert(tagTableTagWidths.size() == (nHistoryTables+1)); - assert(logTagTableSizes.size() == (nHistoryTables+1)); - - // First entry is for the Bimodal table and it is untagged in this - // implementation - assert(tagTableTagWidths[0] == 0); - - for (auto& history : threadHistory) { - history.computeIndices = new FoldedHistory[nHistoryTables+1]; - history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; - history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; - - for (int i = 1; i <= nHistoryTables; i++) { - history.computeIndices[i].init( - histLengths[i], (logTagTableSizes[i])); - history.computeTags[0][i].init( - history.computeIndices[i].origLength, tagTableTagWidths[i]); - history.computeTags[1][i].init( - history.computeIndices[i].origLength, tagTableTagWidths[i]-1); - DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", - histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]); - } - } - - const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0]; - btablePrediction.resize(bimodalTableSize, false); - btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, - true); - ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; - gtable = new TageEntry*[nHistoryTables + 1]; - for (int i = 1; i <= nHistoryTables; i++) { - gtable[i] = new TageEntry[1<<(logTagTableSizes[i])]; - } - - tableIndices = new int [nHistoryTables+1]; - tableTags = new int [nHistoryTables+1]; - - loopUseCounter = 0; -} - -int -LTAGE::bindex(Addr pc_in) const -{ - return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1)); } int @@ -176,113 +85,9 @@ LTAGE::lindex(Addr pc_in) const return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc); } -int -LTAGE::F(int A, int size, int bank) const -{ - int A1, A2; - - A = A & ((ULL(1) << size) - 1); - A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1)); - A2 = (A >> logTagTableSizes[bank]); - A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) - + (A2 >> (logTagTableSizes[bank] - bank)); - A = A1 ^ A2; - A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) - + (A >> (logTagTableSizes[bank] - bank)); - return (A); -} - - -// gindex computes a full hash of pc, ghist and pathHist -int -LTAGE::gindex(ThreadID tid, Addr pc, int bank) const -{ - int index; - int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits : - histLengths[bank]; - const Addr shiftedPc = pc >> instShiftAmt; - index = - shiftedPc ^ - (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^ - threadHistory[tid].computeIndices[bank].comp ^ - F(threadHistory[tid].pathHist, hlen, bank); - - return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1)); -} - - -// Tag computation -uint16_t -LTAGE::gtag(ThreadID tid, Addr pc, int bank) const -{ - int tag = (pc >> instShiftAmt) ^ - threadHistory[tid].computeTags[0][bank].comp ^ - (threadHistory[tid].computeTags[1][bank].comp << 1); - - return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1)); -} - - -// Up-down saturating counter -void -LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) -{ - assert(nbits <= sizeof(int8_t) << 3); - if (taken) { - if (ctr < ((1 << (nbits - 1)) - 1)) - ctr++; - } else { - if (ctr > -(1 << (nbits - 1))) - ctr--; - } -} - -// Up-down unsigned saturating counter -void -LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) -{ - assert(nbits <= sizeof(uint8_t) << 3); - if (up) { - if (ctr < ((1 << nbits) - 1)) - ctr++; - } else { - if (ctr) - ctr--; - } -} - -// Bimodal prediction -bool -LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const -{ - return btablePrediction[bi->bimodalIndex]; -} - - -// Update the bimodal predictor: a hysteresis bit is shared among N prediction -// bits (N = 2 ^ logRatioBiModalHystEntries) -void -LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi) -{ - int inter = (btablePrediction[bi->bimodalIndex] << 1) - + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; - if (taken) { - if (inter < 3) - inter++; - } else if (inter > 0) { - inter--; - } - const bool pred = inter >> 1; - const bool hyst = inter & 1; - btablePrediction[bi->bimodalIndex] = pred; - btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; - DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); -} - - //loop prediction: only used if high confidence bool -LTAGE::getLoop(Addr pc, BranchInfo* bi) const +LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const { bi->loopHit = -1; bi->loopPredValid = false; @@ -308,7 +113,7 @@ LTAGE::getLoop(Addr pc, BranchInfo* bi) const } void -LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) +LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) { if (bi->loopHit>=0) { int index = lindex(pc); @@ -322,7 +127,7 @@ LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) } void -LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) +LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) { int idx = bi->loopIndex + bi->loopHit; if (bi->loopHit >= 0) { @@ -409,319 +214,57 @@ LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) } -// shifting the global history: we manage the history in a big table in order -// to reduce simulation time -void -LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) -{ - if (pt == 0) { - DPRINTF(LTage, "Rolling over the histories\n"); - // Copy beginning of globalHistoryBuffer to end, such that - // the last maxHist outcomes are still reachable - // through pt[0 .. maxHist - 1]. - for (int i = 0; i < maxHist; i++) - tab[histBufferSize - maxHist + i] = tab[i]; - pt = histBufferSize - maxHist; - h = &tab[pt]; - } - pt--; - h--; - h[0] = (dir) ? 1 : 0; -} - -// Get GHR for hashing indirect predictor -// Build history backwards from pointer in -// bp_history. -unsigned -LTAGE::getGHR(ThreadID tid, void *bp_history) const -{ - BranchInfo* bi = static_cast<BranchInfo*>(bp_history); - unsigned val = 0; - for (unsigned i = 0; i < 32; i++) { - // Make sure we don't go out of bounds - int gh_offset = bi->ptGhist + i; - assert(&(threadHistory[tid].globalHistory[gh_offset]) < - threadHistory[tid].globalHistory + histBufferSize); - val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); - } - - return val; -} - //prediction bool LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) { - BranchInfo *bi = new BranchInfo(nHistoryTables+1); + LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1); b = (void*)(bi); - Addr pc = branch_pc; - bool pred_taken = true; - bi->loopHit = -1; - if (cond_branch) { - // TAGE prediction - - // computes the table addresses and the partial tags - for (int i = 1; i <= nHistoryTables; i++) { - tableIndices[i] = gindex(tid, pc, i); - bi->tableIndices[i] = tableIndices[i]; - tableTags[i] = gtag(tid, pc, i); - bi->tableTags[i] = tableTags[i]; - } + bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi); - bi->bimodalIndex = bindex(pc); - - bi->hitBank = 0; - bi->altBank = 0; - //Look for the bank with longest matching history - for (int i = nHistoryTables; i > 0; i--) { - if (gtable[i][tableIndices[i]].tag == tableTags[i]) { - bi->hitBank = i; - bi->hitBankIndex = tableIndices[bi->hitBank]; - break; - } - } - //Look for the alternate bank - for (int i = bi->hitBank - 1; i > 0; i--) { - if (gtable[i][tableIndices[i]].tag == tableTags[i]) { - bi->altBank = i; - bi->altBankIndex = tableIndices[bi->altBank]; - break; - } - } - //computes the prediction and the alternate prediction - if (bi->hitBank > 0) { - if (bi->altBank > 0) { - bi->altTaken = - gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; - }else { - bi->altTaken = getBimodePred(pc, bi); - } + if (cond_branch) { + bi->loopPred = getLoop(branch_pc, bi); // loop prediction - bi->longestMatchPred = - gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; - bi->pseudoNewAlloc = - abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; - - //if the entry is recognized as a newly allocated entry and - //useAltPredForNewlyAllocated is positive use the alternate - //prediction - if ((useAltPredForNewlyAllocated < 0) - || abs(2 * - gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1) - bi->tagePred = bi->longestMatchPred; - else - bi->tagePred = bi->altTaken; - } else { - bi->altTaken = getBimodePred(pc, bi); - bi->tagePred = bi->altTaken; - bi->longestMatchPred = bi->altTaken; + if ((loopUseCounter >= 0) && bi->loopPredValid) { + pred_taken = bi->loopPred; } - //end TAGE prediction - - bi->loopPred = getLoop(pc, bi); // loop prediction - - pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ? - (bi->loopPred): (bi->tagePred); DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, loopUseCounter, bi->tagePred, bi->altTaken); } - bi->branchPC = branch_pc; - bi->condBranch = cond_branch; + specLoopUpdate(branch_pc, pred_taken, bi); return pred_taken; } -// PREDICTOR UPDATE void -LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, - bool squashed) +LTAGE::condBranchUpdate(Addr branch_pc, bool taken, + TageBranchInfo* tage_bi, int nrand) { - assert(bp_history); - - BranchInfo *bi = static_cast<BranchInfo*>(bp_history); - - if (squashed) { - // This restores the global history, then update it - // and recomputes the folded histories. - squash(tid, taken, bp_history); - return; - } - - int nrand = random_mt.random<int>(0,3); - Addr pc = branch_pc; - if (bi->condBranch) { - DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", - branch_pc, taken); - // first update the loop predictor - loopUpdate(pc, taken, bi); - - if (bi->loopPredValid) { - if (bi->tagePred != bi->loopPred) { - ctrUpdate(loopUseCounter, - (bi->loopPred == taken), - withLoopBits); - } - } - - // TAGE UPDATE - // try to allocate a new entries only if prediction was wrong - bool longest_match_pred = false; - bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); - if (bi->hitBank > 0) { - // Manage the selection between longest matching and alternate - // matching for "pseudo"-newly allocated longest matching entry - longest_match_pred = bi->longestMatchPred; - bool PseudoNewAlloc = bi->pseudoNewAlloc; - // an entry is considered as newly allocated if its prediction - // counter is weak - if (PseudoNewAlloc) { - if (longest_match_pred == taken) { - alloc = false; - } - // if it was delivering the correct prediction, no need to - // allocate new entry even if the overall prediction was false - if (longest_match_pred != bi->altTaken) { - ctrUpdate(useAltPredForNewlyAllocated, - bi->altTaken == taken, useAltOnNaBits); - } - } - } - - if (alloc) { - // is there some "unuseful" entry to allocate - uint8_t min = 1; - for (int i = nHistoryTables; i > bi->hitBank; i--) { - if (gtable[i][bi->tableIndices[i]].u < min) { - min = gtable[i][bi->tableIndices[i]].u; - } - } - - // we allocate an entry with a longer history - // to avoid ping-pong, we do not choose systematically the next - // entry, but among the 3 next entries - int Y = nrand & - ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); - int X = bi->hitBank + 1; - if (Y & 1) { - X++; - if (Y & 2) - X++; - } - // No entry available, forces one to be available - if (min > 0) { - gtable[X][bi->tableIndices[X]].u = 0; - } - - - //Allocate only one entry - for (int i = X; i <= nHistoryTables; i++) { - if ((gtable[i][bi->tableIndices[i]].u == 0)) { - gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; - gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; - break; - } - } - } - //periodic reset of u: reset is not complete but bit by bit - tCounter++; - if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) { - // reset least significant bit - // most significant bit becomes least significant bit - for (int i = 1; i <= nHistoryTables; i++) { - for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) { - gtable[i][j].u = gtable[i][j].u >> 1; - } - } - } + LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi); - if (bi->hitBank > 0) { - DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n", - bi->hitBank, bi->hitBankIndex, branch_pc); - ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, - tagTableCounterBits); - // if the provider entry is not certified to be useful also update - // the alternate prediction - if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { - if (bi->altBank > 0) { - ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, - tagTableCounterBits); - DPRINTF(LTage, "Updating tag table entry (%d,%d) for" - " branch %lx\n", bi->hitBank, bi->hitBankIndex, - branch_pc); - } - if (bi->altBank == 0) { - baseUpdate(pc, taken, bi); - } - } + // first update the loop predictor + loopUpdate(branch_pc, taken, bi); - // update the u counter - if (bi->tagePred != bi->altTaken) { - unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, - bi->tagePred == taken, tagTableUBits); - } - } else { - baseUpdate(pc, taken, bi); + if (bi->loopPredValid) { + if (bi->tagePred != bi->loopPred) { + ctrUpdate(loopUseCounter, + (bi->loopPred == taken), + withLoopBits); } - - //END PREDICTOR UPDATE } - if (!squashed) { - delete bi; - } -} -void -LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) -{ - BranchInfo* bi = (BranchInfo*)(b); - ThreadHistory& tHist = threadHistory[tid]; - // UPDATE HISTORIES - bool pathbit = ((branch_pc >> instShiftAmt) & 1); - //on a squash, return pointers to this and recompute indices. - //update user history - updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); - tHist.pathHist = (tHist.pathHist << 1) + pathbit; - tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1)); - - bi->ptGhist = tHist.ptGhist; - bi->pathHist = tHist.pathHist; - //prepare next index and tag computations for user branchs - for (int i = 1; i <= nHistoryTables; i++) - { - bi->ci[i] = tHist.computeIndices[i].comp; - bi->ct0[i] = tHist.computeTags[0][i].comp; - bi->ct1[i] = tHist.computeTags[1][i].comp; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } - DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, " - "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, - tHist.ptGhist); + TAGE::condBranchUpdate(branch_pc, taken, bi, nrand); } void LTAGE::squash(ThreadID tid, bool taken, void *bp_history) { - BranchInfo* bi = (BranchInfo*)(bp_history); - ThreadHistory& tHist = threadHistory[tid]; - DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " - "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); - tHist.pathHist = bi->pathHist; - tHist.ptGhist = bi->ptGhist; - tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); - tHist.gHist[0] = (taken ? 1 : 0); - for (int i = 1; i <= nHistoryTables; i++) { - tHist.computeIndices[i].comp = bi->ci[i]; - tHist.computeTags[0][i].comp = bi->ct0[i]; - tHist.computeTags[1][i].comp = bi->ct1[i]; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } + TAGE::squash(tid, taken, bp_history); + + LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); if (bi->condBranch) { if (bi->loopHit >= 0) { @@ -729,14 +272,12 @@ LTAGE::squash(ThreadID tid, bool taken, void *bp_history) ltable[idx].currentIterSpec = bi->currentIter; } } - } void LTAGE::squash(ThreadID tid, void *bp_history) { - BranchInfo* bi = (BranchInfo*)(bp_history); - DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC); + LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); if (bi->condBranch) { if (bi->loopHit >= 0) { int idx = bi->loopIndex + bi->loopHit; @@ -744,48 +285,7 @@ LTAGE::squash(ThreadID tid, void *bp_history) } } - delete bi; -} - -bool -LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) -{ - bool retval = predict(tid, branch_pc, true, bp_history); - - DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); - updateHistories(tid, branch_pc, retval, bp_history); - assert(threadHistory[tid].gHist == - &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); - - return retval; -} - -void -LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) -{ - BranchInfo* bi = (BranchInfo*) bp_history; - ThreadHistory& tHist = threadHistory[tid]; - DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc); - assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); - tHist.gHist[0] = 0; - for (int i = 1; i <= nHistoryTables; i++) { - tHist.computeIndices[i].comp = bi->ci[i]; - tHist.computeTags[0][i].comp = bi->ct0[i]; - tHist.computeTags[1][i].comp = bi->ct1[i]; - tHist.computeIndices[i].update(tHist.gHist); - tHist.computeTags[0][i].update(tHist.gHist); - tHist.computeTags[1][i].update(tHist.gHist); - } -} - -void -LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) -{ - DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc); - predict(tid, br_pc, false, bp_history); - updateHistories(tid, br_pc, true, bp_history); - assert(threadHistory[tid].gHist == - &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); + TAGE::squash(tid, bp_history); } LTAGE* diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh index 8b417d44d..d614026d2 100644 --- a/src/cpu/pred/ltage.hh +++ b/src/cpu/pred/ltage.hh @@ -52,25 +52,20 @@ #ifndef __CPU_PRED_LTAGE #define __CPU_PRED_LTAGE + #include <vector> #include "base/types.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/tage.hh" #include "params/LTAGE.hh" -class LTAGE: public BPredUnit +class LTAGE: public TAGE { public: LTAGE(const LTAGEParams *params); // Base class methods. - void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override; - bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override; - void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override; - void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed) override; void squash(ThreadID tid, void *bp_history) override; - unsigned getGHR(ThreadID tid, void *bp_history) const override; private: // Prediction Structures @@ -89,189 +84,40 @@ class LTAGE: public BPredUnit confidence(0), tag(0), age(0), dir(0) { } }; - // Tage Entry - struct TageEntry - { - int8_t ctr; - uint16_t tag; - uint8_t u; - TageEntry() : ctr(0), tag(0), u(0) { } - }; - - // Folded History Table - compressed history - // to mix with instruction PC to index partially - // tagged tables. - struct FoldedHistory - { - unsigned comp; - int compLength; - int origLength; - int outpoint; - - void init(int original_length, int compressed_length) - { - comp = 0; - origLength = original_length; - compLength = compressed_length; - outpoint = original_length % compressed_length; - } - - void update(uint8_t * h) - { - comp = (comp << 1) | h[0]; - comp ^= h[origLength] << outpoint; - comp ^= (comp >> compLength); - comp &= (ULL(1) << compLength) - 1; - } - }; - // Primary branch history entry - struct BranchInfo + struct LTageBranchInfo : public TageBranchInfo { - int pathHist; - int ptGhist; - int hitBank; - int hitBankIndex; - int altBank; - int altBankIndex; - int bimodalIndex; uint16_t loopTag; uint16_t currentIter; - bool tagePred; - bool altTaken; bool loopPred; bool loopPredValid; int loopIndex; int loopHit; - bool condBranch; - bool longestMatchPred; - bool pseudoNewAlloc; - Addr branchPC; - - // Pointer to dynamically allocated storage - // to save table indices and folded histories. - // To do one call to new instead of five. - int *storage; - - // Pointers to actual saved array within the dynamically - // allocated storage. - int *tableIndices; - int *tableTags; - int *ci; - int *ct0; - int *ct1; - BranchInfo(int sz) - : pathHist(0), ptGhist(0), - hitBank(0), hitBankIndex(0), - altBank(0), altBankIndex(0), - bimodalIndex(0), loopTag(0), currentIter(0), - tagePred(false), altTaken(false), loopPred(false), - loopPredValid(false), loopIndex(0), loopHit(0), - condBranch(false), longestMatchPred(false), - pseudoNewAlloc(false), branchPC(0) - { - storage = new int [sz * 5]; - tableIndices = storage; - tableTags = storage + sz; - ci = tableTags + sz; - ct0 = ci + sz; - ct1 = ct0 + sz; - } - - ~BranchInfo() - { - delete[] storage; - } + LTageBranchInfo(int sz) + : TageBranchInfo(sz), + loopTag(0), currentIter(0), + loopPred(false), + loopPredValid(false), loopIndex(0), loopHit(0) + {} }; /** * Computes the index used to access the - * bimodal table. - * @param pc_in The unshifted branch PC. - */ - int bindex(Addr pc_in) const; - - /** - * Computes the index used to access the * loop predictor. * @param pc_in The unshifted branch PC. */ int lindex(Addr pc_in) const; /** - * Computes the index used to access a - * partially tagged table. - * @param tid The thread ID used to select the - * global histories to use. - * @param pc The unshifted branch PC. - * @param bank The partially tagged table to access. - */ - inline int gindex(ThreadID tid, Addr pc, int bank) const; - - /** - * Utility function to shuffle the path history - * depending on which tagged table we are accessing. - * @param phist The path history. - * @param size Number of path history bits to use. - * @param bank The partially tagged table to access. - */ - int F(int phist, int size, int bank) const; - - /** - * Computes the partial tag of a tagged table. - * @param tid the thread ID used to select the - * global histories to use. - * @param pc The unshifted branch PC. - * @param bank The partially tagged table to access. - */ - inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const; - - /** - * Updates a direction counter based on the actual - * branch outcome. - * @param ctr Reference to counter to update. - * @param taken Actual branch outcome. - * @param nbits Counter width. - */ - void ctrUpdate(int8_t & ctr, bool taken, int nbits); - - /** - * Updates an unsigned counter based on up/down parameter - * @param ctr Reference to counter to update. - * @param up Boolean indicating if the counter is incremented/decremented - * If true it is incremented, if false it is decremented - * @param nbits Counter width. - */ - void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits); - - /** - * Get a branch prediction from the bimodal - * predictor. - * @param pc The unshifted branch PC. - * @param bi Pointer to information on the - * prediction. - */ - bool getBimodePred(Addr pc, BranchInfo* bi) const; - - /** - * Updates the bimodal predictor. - * @param pc The unshifted branch PC. - * @param taken The actual branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void baseUpdate(Addr pc, bool taken, BranchInfo* bi); - - /** * Get a branch prediction from the loop * predictor. * @param pc The unshifted branch PC. * @param bi Pointer to information on the * prediction. */ - bool getLoop(Addr pc, BranchInfo* bi) const; + bool getLoop(Addr pc, LTageBranchInfo* bi) const; /** * Updates the loop predictor. @@ -280,55 +126,41 @@ class LTAGE: public BPredUnit * @param bi Pointer to information on the * prediction recorded at prediction time. */ - void loopUpdate(Addr pc, bool Taken, BranchInfo* bi); - - /** - * (Speculatively) updates the global branch history. - * @param h Reference to pointer to global branch history. - * @param dir (Predicted) outcome to update the histories - * with. - * @param tab - * @param PT Reference to path history. - */ - void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT); + void loopUpdate(Addr pc, bool Taken, LTageBranchInfo* bi); /** - * Get a branch prediction from L-TAGE. *NOT* an override of - * BpredUnit::predict(). - * @param tid The thread ID to select the global - * histories to use. - * @param branch_pc The unshifted branch PC. - * @param cond_branch True if the branch is conditional. - * @param b Reference to wrapping pointer to allow storing - * derived class prediction information in the base class. + * Speculatively updates the loop predictor + * iteration count. + * @param pc The unshifted branch PC. + * @param taken The predicted branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. */ - bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b); + void specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi); /** - * Update L-TAGE. Called at execute to repair histories on a misprediction - * and at commit to update the tables. - * @param tid The thread ID to select the global - * histories to use. + * Update LTAGE for conditional branches. * @param branch_pc The unshifted branch PC. * @param taken Actual branch outcome. * @param bi Pointer to information on the prediction * recorded at prediction time. + * @nrand Random int number from 0 to 3 */ - void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi); + void condBranchUpdate( + Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand) override; - /** - * (Speculatively) updates global histories (path and direction). - * Also recomputes compressed (folded) histories based on the - * branch direction. - * @param tid The thread ID to select the histories - * to update. - * @param branch_pc The unshifted branch PC. - * @param taken (Predicted) branch direction. - * @param b Wrapping pointer to BranchInfo (to allow - * storing derived class prediction information in the - * base class). - */ - void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b); + /** + * Get a branch prediction from LTAGE. *NOT* an override of + * BpredUnit::predict(). + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param b Reference to wrapping pointer to allow storing + * derived class prediction information in the base class. + */ + bool predict( + ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override; /** * Restores speculatively updated path and direction histories. @@ -337,32 +169,15 @@ class LTAGE: public BPredUnit * This version of squash() is called once on a branch misprediction. * @param tid The Thread ID to select the histories to rollback. * @param taken The correct branch outcome. - * @param bp_history Wrapping pointer to BranchInfo (to allow + * @param bp_history Wrapping pointer to TageBranchInfo (to allow * storing derived class prediction information in the * base class). * @post bp_history points to valid memory. */ - void squash(ThreadID tid, bool taken, void *bp_history); - - /** - * Speculatively updates the loop predictor - * iteration count. - * @param pc The unshifted branch PC. - * @param taken The predicted branch outcome. - * @param bi Pointer to information on the prediction - * recorded at prediction time. - */ - void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi); + void squash( + ThreadID tid, bool taken, void *bp_history) override; - const unsigned logRatioBiModalHystEntries; const unsigned logSizeLoopPred; - const unsigned nHistoryTables; - const unsigned tagTableCounterBits; - const unsigned tagTableUBits; - const unsigned histBufferSize; - const unsigned minHist; - const unsigned maxHist; - const unsigned pathHistBits; const unsigned loopTableAgeBits; const unsigned loopTableConfidenceBits; const unsigned loopTableTagBits; @@ -372,48 +187,9 @@ class LTAGE: public BPredUnit const uint16_t loopTagMask; const uint16_t loopNumIterMask; - const std::vector<unsigned> tagTableTagWidths; - const std::vector<int> logTagTableSizes; - - std::vector<bool> btablePrediction; - std::vector<bool> btableHysteresis; - TageEntry **gtable; LoopEntry *ltable; - // Keep per-thread histories to - // support SMT. - struct ThreadHistory { - // Speculative path history - // (LSB of branch address) - int pathHist; - - // Speculative branch direction - // history (circular buffer) - // @TODO Convert to std::vector<bool> - uint8_t *globalHistory; - - // Pointer to most recent branch outcome - uint8_t* gHist; - - // Index to most recent branch outcome - int ptGhist; - - // Speculative folded histories. - FoldedHistory *computeIndices; - FoldedHistory *computeTags[2]; - }; - - std::vector<ThreadHistory> threadHistory; - - int *histLengths; - int *tableIndices; - int *tableTags; - int8_t loopUseCounter; - int8_t useAltPredForNewlyAllocated; - uint64_t tCounter; - uint64_t logUResetPeriod; - unsigned useAltOnNaBits; unsigned withLoopBits; }; diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc new file mode 100644 index 000000000..c22e6b7a5 --- /dev/null +++ b/src/cpu/pred/tage.cc @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +/* @file + * Implementation of a TAGE branch predictor + */ + +#include "cpu/pred/tage.hh" + +#include "base/intmath.hh" +#include "base/logging.hh" +#include "base/random.hh" +#include "base/trace.hh" +#include "debug/Fetch.hh" +#include "debug/Tage.hh" + +TAGE::TAGE(const TAGEParams *params) + : BPredUnit(params), + logRatioBiModalHystEntries(params->logRatioBiModalHystEntries), + nHistoryTables(params->nHistoryTables), + tagTableCounterBits(params->tagTableCounterBits), + tagTableUBits(params->tagTableUBits), + histBufferSize(params->histBufferSize), + minHist(params->minHist), + maxHist(params->maxHist), + pathHistBits(params->pathHistBits), + tagTableTagWidths(params->tagTableTagWidths), + logTagTableSizes(params->logTagTableSizes), + threadHistory(params->numThreads), + logUResetPeriod(params->logUResetPeriod), + useAltOnNaBits(params->useAltOnNaBits) +{ + // Current method for periodically resetting the u counter bits only + // works for 1 or 2 bits + // Also make sure that it is not 0 + assert(tagTableUBits <= 2 && (tagTableUBits > 0)); + + // we use int type for the path history, so it cannot be more than + // its size + assert(pathHistBits <= (sizeof(int)*8)); + + // initialize the counter to half of the period + assert(logUResetPeriod != 0); + tCounter = ULL(1) << (logUResetPeriod - 1); + + assert(params->histBufferSize > params->maxHist * 2); + useAltPredForNewlyAllocated = 0; + + for (auto& history : threadHistory) { + history.pathHist = 0; + history.globalHistory = new uint8_t[histBufferSize]; + history.gHist = history.globalHistory; + memset(history.gHist, 0, histBufferSize); + history.ptGhist = 0; + } + + histLengths = new int [nHistoryTables+1]; + histLengths[1] = minHist; + histLengths[nHistoryTables] = maxHist; + + for (int i = 2; i <= nHistoryTables; i++) { + histLengths[i] = (int) (((double) minHist * + pow ((double) (maxHist) / (double) minHist, + (double) (i - 1) / (double) ((nHistoryTables- 1)))) + + 0.5); + } + + assert(tagTableTagWidths.size() == (nHistoryTables+1)); + assert(logTagTableSizes.size() == (nHistoryTables+1)); + + // First entry is for the Bimodal table and it is untagged in this + // implementation + assert(tagTableTagWidths[0] == 0); + + for (auto& history : threadHistory) { + history.computeIndices = new FoldedHistory[nHistoryTables+1]; + history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; + history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; + + for (int i = 1; i <= nHistoryTables; i++) { + history.computeIndices[i].init( + histLengths[i], (logTagTableSizes[i])); + history.computeTags[0][i].init( + history.computeIndices[i].origLength, tagTableTagWidths[i]); + history.computeTags[1][i].init( + history.computeIndices[i].origLength, tagTableTagWidths[i]-1); + DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", + histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]); + } + } + + const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0]; + btablePrediction.resize(bimodalTableSize, false); + btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, + true); + + gtable = new TageEntry*[nHistoryTables + 1]; + for (int i = 1; i <= nHistoryTables; i++) { + gtable[i] = new TageEntry[1<<(logTagTableSizes[i])]; + } + + tableIndices = new int [nHistoryTables+1]; + tableTags = new int [nHistoryTables+1]; +} + +int +TAGE::bindex(Addr pc_in) const +{ + return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1)); +} + +int +TAGE::F(int A, int size, int bank) const +{ + int A1, A2; + + A = A & ((ULL(1) << size) - 1); + A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1)); + A2 = (A >> logTagTableSizes[bank]); + A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) + + (A2 >> (logTagTableSizes[bank] - bank)); + A = A1 ^ A2; + A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) + + (A >> (logTagTableSizes[bank] - bank)); + return (A); +} + + +// gindex computes a full hash of pc, ghist and pathHist +int +TAGE::gindex(ThreadID tid, Addr pc, int bank) const +{ + int index; + int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits : + histLengths[bank]; + const Addr shiftedPc = pc >> instShiftAmt; + index = + shiftedPc ^ + (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^ + threadHistory[tid].computeIndices[bank].comp ^ + F(threadHistory[tid].pathHist, hlen, bank); + + return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1)); +} + + +// Tag computation +uint16_t +TAGE::gtag(ThreadID tid, Addr pc, int bank) const +{ + int tag = (pc >> instShiftAmt) ^ + threadHistory[tid].computeTags[0][bank].comp ^ + (threadHistory[tid].computeTags[1][bank].comp << 1); + + return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1)); +} + + +// Up-down saturating counter +void +TAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) +{ + assert(nbits <= sizeof(int8_t) << 3); + if (taken) { + if (ctr < ((1 << (nbits - 1)) - 1)) + ctr++; + } else { + if (ctr > -(1 << (nbits - 1))) + ctr--; + } +} + +// Up-down unsigned saturating counter +void +TAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) +{ + assert(nbits <= sizeof(uint8_t) << 3); + if (up) { + if (ctr < ((1 << nbits) - 1)) + ctr++; + } else { + if (ctr) + ctr--; + } +} + +// Bimodal prediction +bool +TAGE::getBimodePred(Addr pc, TageBranchInfo* bi) const +{ + return btablePrediction[bi->bimodalIndex]; +} + + +// Update the bimodal predictor: a hysteresis bit is shared among N prediction +// bits (N = 2 ^ logRatioBiModalHystEntries) +void +TAGE::baseUpdate(Addr pc, bool taken, TageBranchInfo* bi) +{ + int inter = (btablePrediction[bi->bimodalIndex] << 1) + + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; + if (taken) { + if (inter < 3) + inter++; + } else if (inter > 0) { + inter--; + } + const bool pred = inter >> 1; + const bool hyst = inter & 1; + btablePrediction[bi->bimodalIndex] = pred; + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; + DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); +} + +// shifting the global history: we manage the history in a big table in order +// to reduce simulation time +void +TAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) +{ + if (pt == 0) { + DPRINTF(Tage, "Rolling over the histories\n"); + // Copy beginning of globalHistoryBuffer to end, such that + // the last maxHist outcomes are still reachable + // through pt[0 .. maxHist - 1]. + for (int i = 0; i < maxHist; i++) + tab[histBufferSize - maxHist + i] = tab[i]; + pt = histBufferSize - maxHist; + h = &tab[pt]; + } + pt--; + h--; + h[0] = (dir) ? 1 : 0; +} + +// Get GHR for hashing indirect predictor +// Build history backwards from pointer in +// bp_history. +unsigned +TAGE::getGHR(ThreadID tid, void *bp_history) const +{ + TageBranchInfo* bi = static_cast<TageBranchInfo*>(bp_history); + unsigned val = 0; + for (unsigned i = 0; i < 32; i++) { + // Make sure we don't go out of bounds + int gh_offset = bi->ptGhist + i; + assert(&(threadHistory[tid].globalHistory[gh_offset]) < + threadHistory[tid].globalHistory + histBufferSize); + val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); + } + + return val; +} + +//prediction +bool +TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) +{ + TageBranchInfo *bi = new TageBranchInfo(nHistoryTables+1); + b = (void*)(bi); + return tagePredict(tid, branch_pc, cond_branch, bi); +} + +bool +TAGE::tagePredict(ThreadID tid, Addr branch_pc, + bool cond_branch, TageBranchInfo* bi) +{ + Addr pc = branch_pc; + bool pred_taken = true; + + if (cond_branch) { + // TAGE prediction + + // computes the table addresses and the partial tags + for (int i = 1; i <= nHistoryTables; i++) { + tableIndices[i] = gindex(tid, pc, i); + bi->tableIndices[i] = tableIndices[i]; + tableTags[i] = gtag(tid, pc, i); + bi->tableTags[i] = tableTags[i]; + } + + bi->bimodalIndex = bindex(pc); + + bi->hitBank = 0; + bi->altBank = 0; + //Look for the bank with longest matching history + for (int i = nHistoryTables; i > 0; i--) { + if (gtable[i][tableIndices[i]].tag == tableTags[i]) { + bi->hitBank = i; + bi->hitBankIndex = tableIndices[bi->hitBank]; + break; + } + } + //Look for the alternate bank + for (int i = bi->hitBank - 1; i > 0; i--) { + if (gtable[i][tableIndices[i]].tag == tableTags[i]) { + bi->altBank = i; + bi->altBankIndex = tableIndices[bi->altBank]; + break; + } + } + //computes the prediction and the alternate prediction + if (bi->hitBank > 0) { + if (bi->altBank > 0) { + bi->altTaken = + gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; + }else { + bi->altTaken = getBimodePred(pc, bi); + } + + bi->longestMatchPred = + gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; + bi->pseudoNewAlloc = + abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; + + //if the entry is recognized as a newly allocated entry and + //useAltPredForNewlyAllocated is positive use the alternate + //prediction + if ((useAltPredForNewlyAllocated < 0) + || abs(2 * + gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1) + bi->tagePred = bi->longestMatchPred; + else + bi->tagePred = bi->altTaken; + } else { + bi->altTaken = getBimodePred(pc, bi); + bi->tagePred = bi->altTaken; + bi->longestMatchPred = bi->altTaken; + } + //end TAGE prediction + + pred_taken = (bi->tagePred); + DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n", + branch_pc, pred_taken, bi->tagePred, bi->altTaken); + } + bi->branchPC = branch_pc; + bi->condBranch = cond_branch; + return pred_taken; +} + +// PREDICTOR UPDATE +void +TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, + bool squashed) +{ + assert(bp_history); + + TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history); + + if (squashed) { + // This restores the global history, then update it + // and recomputes the folded histories. + squash(tid, taken, bp_history); + return; + } + + int nrand = random_mt.random<int>(0,3); + if (bi->condBranch) { + DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n", + branch_pc, taken); + condBranchUpdate(branch_pc, taken, bi, nrand); + } + if (!squashed) { + delete bi; + } +} + +void +TAGE::condBranchUpdate(Addr branch_pc, bool taken, + TageBranchInfo* bi, int nrand) +{ + // TAGE UPDATE + // try to allocate a new entries only if prediction was wrong + bool longest_match_pred = false; + bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); + if (bi->hitBank > 0) { + // Manage the selection between longest matching and alternate + // matching for "pseudo"-newly allocated longest matching entry + longest_match_pred = bi->longestMatchPred; + bool PseudoNewAlloc = bi->pseudoNewAlloc; + // an entry is considered as newly allocated if its prediction + // counter is weak + if (PseudoNewAlloc) { + if (longest_match_pred == taken) { + alloc = false; + } + // if it was delivering the correct prediction, no need to + // allocate new entry even if the overall prediction was false + if (longest_match_pred != bi->altTaken) { + ctrUpdate(useAltPredForNewlyAllocated, + bi->altTaken == taken, useAltOnNaBits); + } + } + } + + if (alloc) { + // is there some "unuseful" entry to allocate + uint8_t min = 1; + for (int i = nHistoryTables; i > bi->hitBank; i--) { + if (gtable[i][bi->tableIndices[i]].u < min) { + min = gtable[i][bi->tableIndices[i]].u; + } + } + + // we allocate an entry with a longer history + // to avoid ping-pong, we do not choose systematically the next + // entry, but among the 3 next entries + int Y = nrand & + ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); + int X = bi->hitBank + 1; + if (Y & 1) { + X++; + if (Y & 2) + X++; + } + // No entry available, forces one to be available + if (min > 0) { + gtable[X][bi->tableIndices[X]].u = 0; + } + + + //Allocate only one entry + for (int i = X; i <= nHistoryTables; i++) { + if ((gtable[i][bi->tableIndices[i]].u == 0)) { + gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; + gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; + break; + } + } + } + //periodic reset of u: reset is not complete but bit by bit + tCounter++; + if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) { + // reset least significant bit + // most significant bit becomes least significant bit + for (int i = 1; i <= nHistoryTables; i++) { + for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) { + gtable[i][j].u = gtable[i][j].u >> 1; + } + } + } + + if (bi->hitBank > 0) { + DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n", + bi->hitBank, bi->hitBankIndex, branch_pc); + ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, + tagTableCounterBits); + // if the provider entry is not certified to be useful also update + // the alternate prediction + if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { + if (bi->altBank > 0) { + ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, + tagTableCounterBits); + DPRINTF(Tage, "Updating tag table entry (%d,%d) for" + " branch %lx\n", bi->hitBank, bi->hitBankIndex, + branch_pc); + } + if (bi->altBank == 0) { + baseUpdate(branch_pc, taken, bi); + } + } + + // update the u counter + if (bi->tagePred != bi->altTaken) { + unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, + bi->tagePred == taken, tagTableUBits); + } + } else { + baseUpdate(branch_pc, taken, bi); + } +} + +void +TAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) +{ + TageBranchInfo* bi = (TageBranchInfo*)(b); + ThreadHistory& tHist = threadHistory[tid]; + // UPDATE HISTORIES + bool pathbit = ((branch_pc >> instShiftAmt) & 1); + //on a squash, return pointers to this and recompute indices. + //update user history + updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); + tHist.pathHist = (tHist.pathHist << 1) + pathbit; + tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1)); + + bi->ptGhist = tHist.ptGhist; + bi->pathHist = tHist.pathHist; + //prepare next index and tag computations for user branchs + for (int i = 1; i <= nHistoryTables; i++) + { + bi->ci[i] = tHist.computeIndices[i].comp; + bi->ct0[i] = tHist.computeTags[0][i].comp; + bi->ct1[i] = tHist.computeTags[1][i].comp; + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } + DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, " + "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, + tHist.ptGhist); +} + +void +TAGE::squash(ThreadID tid, bool taken, void *bp_history) +{ + TageBranchInfo* bi = (TageBranchInfo*)(bp_history); + ThreadHistory& tHist = threadHistory[tid]; + DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " + "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); + tHist.pathHist = bi->pathHist; + tHist.ptGhist = bi->ptGhist; + tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); + tHist.gHist[0] = (taken ? 1 : 0); + for (int i = 1; i <= nHistoryTables; i++) { + tHist.computeIndices[i].comp = bi->ci[i]; + tHist.computeTags[0][i].comp = bi->ct0[i]; + tHist.computeTags[1][i].comp = bi->ct1[i]; + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } +} + +void +TAGE::squash(ThreadID tid, void *bp_history) +{ + TageBranchInfo* bi = (TageBranchInfo*)(bp_history); + DPRINTF(Tage, "Deleting branch info: %lx\n", bi->branchPC); + delete bi; +} + +bool +TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) +{ + bool retval = predict(tid, branch_pc, true, bp_history); + + DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); + updateHistories(tid, branch_pc, retval, bp_history); + assert(threadHistory[tid].gHist == + &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); + + return retval; +} + +void +TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) +{ + TageBranchInfo* bi = (TageBranchInfo*) bp_history; + ThreadHistory& tHist = threadHistory[tid]; + DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc); + assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); + tHist.gHist[0] = 0; + for (int i = 1; i <= nHistoryTables; i++) { + tHist.computeIndices[i].comp = bi->ci[i]; + tHist.computeTags[0][i].comp = bi->ct0[i]; + tHist.computeTags[1][i].comp = bi->ct1[i]; + tHist.computeIndices[i].update(tHist.gHist); + tHist.computeTags[0][i].update(tHist.gHist); + tHist.computeTags[1][i].update(tHist.gHist); + } +} + +void +TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) +{ + DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc); + predict(tid, br_pc, false, bp_history); + updateHistories(tid, br_pc, true, bp_history); + assert(threadHistory[tid].gHist == + &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); +} + +TAGE* +TAGEParams::create() +{ + return new TAGE(this); +} diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh new file mode 100644 index 000000000..9ba02414c --- /dev/null +++ b/src/cpu/pred/tage.hh @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2014 The University of Wisconsin + * + * Copyright (c) 2006 INRIA (Institut National de Recherche en + * Informatique et en Automatique / French National Research Institute + * for Computer Science and Applied Mathematics) + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, + * from André Seznec's code. + */ + +/* @file + * Implementation of a TAGE branch predictor. TAGE is a global-history based + * branch predictor. It features a PC-indexed bimodal predictor and N + * partially tagged tables, indexed with a hash of the PC and the global + * branch history. The different lengths of global branch history used to + * index the partially tagged tables grow geometrically. A small path history + * is also used in the hash. + * + * All TAGE tables are accessed in parallel, and the one using the longest + * history that matches provides the prediction (some exceptions apply). + * Entries are allocated in components using a longer history than the + * one that predicted when the prediction is incorrect. + */ + +#ifndef __CPU_PRED_TAGE +#define __CPU_PRED_TAGE + +#include <vector> + +#include "base/types.hh" +#include "cpu/pred/bpred_unit.hh" +#include "params/TAGE.hh" + +class TAGE: public BPredUnit +{ + public: + TAGE(const TAGEParams *params); + + // Base class methods. + void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override; + bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override; + void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override; + void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, + bool squashed) override; + virtual void squash(ThreadID tid, void *bp_history) override; + unsigned getGHR(ThreadID tid, void *bp_history) const override; + + protected: + // Prediction Structures + + // Tage Entry + struct TageEntry + { + int8_t ctr; + uint16_t tag; + uint8_t u; + TageEntry() : ctr(0), tag(0), u(0) { } + }; + + // Folded History Table - compressed history + // to mix with instruction PC to index partially + // tagged tables. + struct FoldedHistory + { + unsigned comp; + int compLength; + int origLength; + int outpoint; + + void init(int original_length, int compressed_length) + { + comp = 0; + origLength = original_length; + compLength = compressed_length; + outpoint = original_length % compressed_length; + } + + void update(uint8_t * h) + { + comp = (comp << 1) | h[0]; + comp ^= h[origLength] << outpoint; + comp ^= (comp >> compLength); + comp &= (ULL(1) << compLength) - 1; + } + }; + + // Primary branch history entry + struct TageBranchInfo + { + int pathHist; + int ptGhist; + int hitBank; + int hitBankIndex; + int altBank; + int altBankIndex; + int bimodalIndex; + + bool tagePred; + bool altTaken; + bool condBranch; + bool longestMatchPred; + bool pseudoNewAlloc; + Addr branchPC; + + // Pointer to dynamically allocated storage + // to save table indices and folded histories. + // To do one call to new instead of five. + int *storage; + + // Pointers to actual saved array within the dynamically + // allocated storage. + int *tableIndices; + int *tableTags; + int *ci; + int *ct0; + int *ct1; + + TageBranchInfo(int sz) + : pathHist(0), ptGhist(0), + hitBank(0), hitBankIndex(0), + altBank(0), altBankIndex(0), + bimodalIndex(0), + tagePred(false), altTaken(false), + condBranch(false), longestMatchPred(false), + pseudoNewAlloc(false), branchPC(0) + { + storage = new int [sz * 5]; + tableIndices = storage; + tableTags = storage + sz; + ci = tableTags + sz; + ct0 = ci + sz; + ct1 = ct0 + sz; + } + + virtual ~TageBranchInfo() + { + delete[] storage; + } + }; + + /** + * Computes the index used to access the + * bimodal table. + * @param pc_in The unshifted branch PC. + */ + int bindex(Addr pc_in) const; + + /** + * Computes the index used to access a + * partially tagged table. + * @param tid The thread ID used to select the + * global histories to use. + * @param pc The unshifted branch PC. + * @param bank The partially tagged table to access. + */ + inline int gindex(ThreadID tid, Addr pc, int bank) const; + + /** + * Utility function to shuffle the path history + * depending on which tagged table we are accessing. + * @param phist The path history. + * @param size Number of path history bits to use. + * @param bank The partially tagged table to access. + */ + int F(int phist, int size, int bank) const; + + /** + * Computes the partial tag of a tagged table. + * @param tid the thread ID used to select the + * global histories to use. + * @param pc The unshifted branch PC. + * @param bank The partially tagged table to access. + */ + inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const; + + /** + * Updates a direction counter based on the actual + * branch outcome. + * @param ctr Reference to counter to update. + * @param taken Actual branch outcome. + * @param nbits Counter width. + */ + void ctrUpdate(int8_t & ctr, bool taken, int nbits); + + /** + * Updates an unsigned counter based on up/down parameter + * @param ctr Reference to counter to update. + * @param up Boolean indicating if the counter is incremented/decremented + * If true it is incremented, if false it is decremented + * @param nbits Counter width. + */ + void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits); + + /** + * Get a branch prediction from the bimodal + * predictor. + * @param pc The unshifted branch PC. + * @param bi Pointer to information on the + * prediction. + */ + bool getBimodePred(Addr pc, TageBranchInfo* bi) const; + + /** + * Updates the bimodal predictor. + * @param pc The unshifted branch PC. + * @param taken The actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void baseUpdate(Addr pc, bool taken, TageBranchInfo* bi); + + /** + * (Speculatively) updates the global branch history. + * @param h Reference to pointer to global branch history. + * @param dir (Predicted) outcome to update the histories + * with. + * @param tab + * @param PT Reference to path history. + */ + void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT); + + /** + * Get a branch prediction from TAGE. *NOT* an override of + * BpredUnit::predict(). + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param b Reference to wrapping pointer to allow storing + * derived class prediction information in the base class. + */ + virtual bool predict( + ThreadID tid, Addr branch_pc, bool cond_branch, void* &b); + + /** + * Update TAGE. Called at execute to repair histories on a misprediction + * and at commit to update the tables. + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + */ + void update(ThreadID tid, Addr branch_pc, bool taken, TageBranchInfo* bi); + + /** + * (Speculatively) updates global histories (path and direction). + * Also recomputes compressed (folded) histories based on the + * branch direction. + * @param tid The thread ID to select the histories + * to update. + * @param branch_pc The unshifted branch PC. + * @param taken (Predicted) branch direction. + * @param b Wrapping pointer to TageBranchInfo (to allow + * storing derived class prediction information in the + * base class). + */ + void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b); + + /** + * Restores speculatively updated path and direction histories. + * Also recomputes compressed (folded) histories based on the + * correct branch outcome. + * This version of squash() is called once on a branch misprediction. + * @param tid The Thread ID to select the histories to rollback. + * @param taken The correct branch outcome. + * @param bp_history Wrapping pointer to TageBranchInfo (to allow + * storing derived class prediction information in the + * base class). + * @post bp_history points to valid memory. + */ + virtual void squash(ThreadID tid, bool taken, void *bp_history); + + /** + * Update TAGE for conditional branches. + * @param branch_pc The unshifted branch PC. + * @param taken Actual branch outcome. + * @param bi Pointer to information on the prediction + * recorded at prediction time. + * @nrand Random int number from 0 to 3 + */ + virtual void condBranchUpdate( + Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand); + + /** + * TAGE prediction called from TAGE::predict + * @param tid The thread ID to select the global + * histories to use. + * @param branch_pc The unshifted branch PC. + * @param cond_branch True if the branch is conditional. + * @param bi Pointer to the TageBranchInfo + */ + bool tagePredict( + ThreadID tid, Addr branch_pc, bool cond_branch, TageBranchInfo* bi); + + const unsigned logRatioBiModalHystEntries; + const unsigned nHistoryTables; + const unsigned tagTableCounterBits; + const unsigned tagTableUBits; + const unsigned histBufferSize; + const unsigned minHist; + const unsigned maxHist; + const unsigned pathHistBits; + + const std::vector<unsigned> tagTableTagWidths; + const std::vector<int> logTagTableSizes; + + std::vector<bool> btablePrediction; + std::vector<bool> btableHysteresis; + TageEntry **gtable; + + // Keep per-thread histories to + // support SMT. + struct ThreadHistory { + // Speculative path history + // (LSB of branch address) + int pathHist; + + // Speculative branch direction + // history (circular buffer) + // @TODO Convert to std::vector<bool> + uint8_t *globalHistory; + + // Pointer to most recent branch outcome + uint8_t* gHist; + + // Index to most recent branch outcome + int ptGhist; + + // Speculative folded histories. + FoldedHistory *computeIndices; + FoldedHistory *computeTags[2]; + }; + + std::vector<ThreadHistory> threadHistory; + + int *histLengths; + int *tableIndices; + int *tableTags; + + int8_t useAltPredForNewlyAllocated; + uint64_t tCounter; + uint64_t logUResetPeriod; + unsigned useAltOnNaBits; +}; + +#endif // __CPU_PRED_TAGE |