From 02aa549c9b1fd81be4bb1408cb97b92dc126e360 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 25 May 2006 14:41:36 -0400 Subject: Fix minor memory leak. --HG-- extra : convert_revision : aa222dd95d833b16b0f474ec156bd6955c2c54c6 --- cpu/o3/lsq_unit_impl.hh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 7974ddaad..10f2b5572 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -51,12 +51,18 @@ LSQUnit::StoreCompletionEvent::process() //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); - if (lsqPtr->isSwitchedOut()) + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + return; + } lsqPtr->cpu->wakeCPU(); - if (wbEvent) + if (wbEvent) { wbEvent->process(); + delete wbEvent; + } lsqPtr->completeStore(storeIdx); } -- cgit v1.2.3 From 248bd2bb62861c8d77de4c8cfab6c6392bd85049 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 25 May 2006 17:01:48 -0400 Subject: Various branch predictor fixes/cleanup. It works more correctly now and supports both local and tournament predictors. cpu/o3/2bit_local_pred.cc: Branch predictor cleanup/fixup. Rename this to LocalBP. cpu/o3/2bit_local_pred.hh: Rename to LocalBP, update to support changes to BPredUnit, include comments. cpu/o3/alpha_cpu_builder.cc: Support extra parameters to the branch predictor. Now it takes in a parameter to tell it which branch predictor it is using, the local or the tournament predictor. cpu/o3/alpha_params.hh: Add in extra parameter for the branch predictor type. cpu/o3/bpred_unit.cc: Branch predictor fixup/cleanup. Rename it to BPredUnit. cpu/o3/bpred_unit.hh: Branch predictor fixup/cleanup. Now supports both the local and tournament predictors, and stores the branch predictor update state. cpu/o3/bpred_unit_impl.hh: Branch predictor overhaul. Now supports both the local and tournament predictors. cpu/o3/cpu_policy.hh: cpu/ozone/ozone_impl.hh: cpu/ozone/simple_impl.hh: Reflect the class name change. cpu/o3/decode_impl.hh: Be sure to set the predicted target as well so we don't squash twice. cpu/o3/tournament_pred.cc: cpu/o3/tournament_pred.hh: Fixes to the tournament predictor. cpu/ozone/simple_params.hh: Include parameter for the branch predictor type. python/m5/objects/AlphaFullCPU.py: python/m5/objects/OzoneCPU.py: Include the parameter for the branch predictor type. --HG-- extra : convert_revision : 34afebb3b40b47accb12558e439ee4cb03df5e64 --- cpu/o3/2bit_local_pred.cc | 25 ++-- cpu/o3/2bit_local_pred.hh | 22 +++- cpu/o3/alpha_cpu_builder.cc | 3 + cpu/o3/alpha_params.hh | 1 + cpu/o3/bpred_unit.cc | 6 +- cpu/o3/bpred_unit.hh | 64 ++++++++--- cpu/o3/bpred_unit_impl.hh | 182 ++++++++++++++++++++++-------- cpu/o3/cpu_policy.hh | 2 +- cpu/o3/decode_impl.hh | 1 + cpu/o3/tournament_pred.cc | 232 ++++++++++++++++++++++---------------- cpu/o3/tournament_pred.hh | 90 +++++++++++++-- cpu/ozone/ozone_impl.hh | 2 +- cpu/ozone/simple_impl.hh | 2 +- cpu/ozone/simple_params.hh | 30 ++++- python/m5/objects/AlphaFullCPU.py | 1 + python/m5/objects/OzoneCPU.py | 1 + 16 files changed, 465 insertions(+), 199 deletions(-) diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc index c3fb2fdb8..33c417c88 100644 --- a/cpu/o3/2bit_local_pred.cc +++ b/cpu/o3/2bit_local_pred.cc @@ -30,9 +30,9 @@ #include "base/trace.hh" #include "cpu/o3/2bit_local_pred.hh" -DefaultBP::DefaultBP(unsigned _localPredictorSize, - unsigned _localCtrBits, - unsigned _instShiftAmt) +LocalBP::LocalBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) : localPredictorSize(_localPredictorSize), localCtrBits(_localCtrBits), instShiftAmt(_instShiftAmt) @@ -68,7 +68,7 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize, } void -DefaultBP::reset() +LocalBP::reset() { for (int i = 0; i < localPredictorSets; ++i) { localCtrs[i].reset(); @@ -76,21 +76,21 @@ DefaultBP::reset() } bool -DefaultBP::lookup(Addr &branch_addr) +LocalBP::lookup(Addr &branch_addr, void * &bp_history) { bool taken; - uint8_t local_prediction; + uint8_t counter_val; unsigned local_predictor_idx = getLocalIndex(branch_addr); DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", local_predictor_idx); - local_prediction = localCtrs[local_predictor_idx].read(); + counter_val = localCtrs[local_predictor_idx].read(); DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", - (int)local_prediction); + (int)counter_val); - taken = getPrediction(local_prediction); + taken = getPrediction(counter_val); #if 0 // Speculative update. @@ -107,8 +107,9 @@ DefaultBP::lookup(Addr &branch_addr) } void -DefaultBP::update(Addr &branch_addr, bool taken) +LocalBP::update(Addr &branch_addr, bool taken, void *bp_history) { + assert(bp_history == NULL); unsigned local_predictor_idx; // Update the local predictor. @@ -128,7 +129,7 @@ DefaultBP::update(Addr &branch_addr, bool taken) inline bool -DefaultBP::getPrediction(uint8_t &count) +LocalBP::getPrediction(uint8_t &count) { // Get the MSB of the count return (count >> (localCtrBits - 1)); @@ -136,7 +137,7 @@ DefaultBP::getPrediction(uint8_t &count) inline unsigned -DefaultBP::getLocalIndex(Addr &branch_addr) +LocalBP::getLocalIndex(Addr &branch_addr) { return (branch_addr >> instShiftAmt) & indexMask; } diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh index cd65978ca..02595702b 100644 --- a/cpu/o3/2bit_local_pred.hh +++ b/cpu/o3/2bit_local_pred.hh @@ -35,7 +35,14 @@ #include -class DefaultBP +/** + * Implements a local predictor that uses the PC to index into a table of + * counters. Note that any time a pointer to the bp_history is given, it + * should be NULL using this predictor because it does not have any branch + * predictor state that needs to be recorded or updated; the update can be + * determined solely by the branch being taken or not taken. + */ +class LocalBP { public: /** @@ -44,28 +51,31 @@ class DefaultBP * @param localCtrBits Number of bits per counter. * @param instShiftAmt Offset amount for instructions to ignore alignment. */ - DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, - unsigned instShiftAmt); + LocalBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); /** * Looks up the given address in the branch predictor and returns * a true/false value as to whether it is taken. * @param branch_addr The address of the branch to look up. + * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr); + bool lookup(Addr &branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. */ - void update(Addr &branch_addr, bool taken); + void update(Addr &branch_addr, bool taken, void *bp_history); + + void squash(void *bp_history) + { assert(bp_history == NULL); } void reset(); private: - /** * Returns the taken/not taken prediction given the value of the * counter. diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index b0d812edc..08d42cd46 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -109,6 +109,7 @@ Param squashWidth; Param trapLatency; Param fetchTrapLatency; +Param predType; Param localPredictorSize; Param localCtrBits; Param localHistoryTableSize; @@ -234,6 +235,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), INIT_PARAM(localHistoryTableSize, "Size of local history table"), @@ -366,6 +368,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->trapLatency = trapLatency; params->fetchTrapLatency = fetchTrapLatency; + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; params->localHistoryTableSize = localHistoryTableSize; diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index e3acf2c05..5eb00426d 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -127,6 +127,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params // // Branch predictor (BP & BTB) // + std::string predType; unsigned localPredictorSize; unsigned localCtrBits; unsigned localHistoryTableSize; diff --git a/cpu/o3/bpred_unit.cc b/cpu/o3/bpred_unit.cc index 92344111f..e149b8073 100644 --- a/cpu/o3/bpred_unit.cc +++ b/cpu/o3/bpred_unit.cc @@ -32,6 +32,6 @@ #include "cpu/ozone/ozone_impl.hh" #include "cpu/ozone/simple_impl.hh" -template class TwobitBPredUnit; -template class TwobitBPredUnit; -template class TwobitBPredUnit; +template class BPredUnit; +template class BPredUnit; +template class BPredUnit; diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh index b7814b2e9..93aae8f15 100644 --- a/cpu/o3/bpred_unit.hh +++ b/cpu/o3/bpred_unit.hh @@ -46,16 +46,25 @@ * and the BTB. */ template -class TwobitBPredUnit +class BPredUnit { - public: + private: typedef typename Impl::Params Params; typedef typename Impl::DynInstPtr DynInstPtr; + enum PredType { + Local, + Tournament + }; + + PredType predictor; + + public: + /** * @param params The params object, that has the size of the BP and BTB. */ - TwobitBPredUnit(Params *params); + BPredUnit(Params *params); /** * Registers statistics. @@ -76,6 +85,9 @@ class TwobitBPredUnit */ bool predict(DynInstPtr &inst, Addr &PC, unsigned tid); + // @todo: Rename this function. + void BPUncond(void * &bp_history); + /** * Tells the branch predictor to commit any updates until the given * sequence number. @@ -104,13 +116,20 @@ class TwobitBPredUnit void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, bool actually_taken, unsigned tid); + /** + * @param bp_history Pointer to the history object. The predictor + * will need to update any state and delete the object. + */ + void BPSquash(void *bp_history); + /** * Looks up a given PC in the BP to see if it is taken or not taken. * @param inst_PC The PC to look up. + * @param bp_history Pointer that will be set to an object that + * has the branch predictor state associated with the lookup. * @return Whether the branch is taken or not taken. */ - bool BPLookup(Addr &inst_PC) - { return BP.lookup(inst_PC); } + bool BPLookup(Addr &inst_PC, void * &bp_history); /** * Looks up a given PC in the BTB to see if a matching entry exists. @@ -132,10 +151,11 @@ class TwobitBPredUnit * Updates the BP with taken/not taken information. * @param inst_PC The branch's PC that will be updated. * @param taken Whether the branch was taken or not taken. + * @param bp_history Pointer to the branch predictor state that is + * associated with the branch lookup that is being updated. * @todo Make this update flexible enough to handle a global predictor. */ - void BPUpdate(Addr &inst_PC, bool taken) - { BP.update(inst_PC, taken); } + void BPUpdate(Addr &inst_PC, bool taken, void *bp_history); /** * Updates the BTB with the target of a branch. @@ -145,18 +165,20 @@ class TwobitBPredUnit void BTBUpdate(Addr &inst_PC, Addr &target_PC) { BTB.update(inst_PC, target_PC,0); } + void dump(); + private: struct PredictorHistory { /** - * Makes a predictor history struct that contains a sequence number, - * the PC of its instruction, and whether or not it was predicted - * taken. + * Makes a predictor history struct that contains any + * information needed to update the predictor, BTB, and RAS. */ PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, - const bool pred_taken, const unsigned _tid) - : seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0), + const bool pred_taken, void *bp_history, + const unsigned _tid) + : seqNum(seq_num), PC(inst_PC), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), - wasCall(0) + wasCall(0), bpHistory(bp_history) { } /** The sequence number for the predictor history entry. */ @@ -168,9 +190,6 @@ class TwobitBPredUnit /** The RAS target (only valid if a return). */ Addr RASTarget; - /** The global history at the time this entry was created. */ - unsigned globalHistory; - /** The RAS index of the instruction (only valid if a call). */ unsigned RASIndex; @@ -185,6 +204,12 @@ class TwobitBPredUnit /** Whether or not the instruction was a call. */ bool wasCall; + + /** Pointer to the history object passed back from the branch + * predictor. It is used to update or restore state of the + * branch predictor. + */ + void *bpHistory; }; typedef std::list History; @@ -196,8 +221,11 @@ class TwobitBPredUnit */ History predHist[Impl::MaxThreads]; - /** The branch predictor. */ - DefaultBP BP; + /** The local branch predictor. */ + LocalBP *localBP; + + /** The tournament branch predictor. */ + TournamentBP *tournamentBP; /** The BTB. */ DefaultBTB BTB; diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh index c37df606b..1844c155e 100644 --- a/cpu/o3/bpred_unit_impl.hh +++ b/cpu/o3/bpred_unit_impl.hh @@ -36,21 +36,40 @@ using namespace std; template -TwobitBPredUnit::TwobitBPredUnit(Params *params) - : BP(params->localPredictorSize, - params->localCtrBits, - params->instShiftAmt), - BTB(params->BTBEntries, +BPredUnit::BPredUnit(Params *params) + : BTB(params->BTBEntries, params->BTBTagSize, params->instShiftAmt) { + // Setup the selected predictor. + if (params->predType == "local") { + localBP = new LocalBP(params->localPredictorSize, + params->localCtrBits, + params->instShiftAmt); + predictor = Local; + } else if (params->predType == "tournament") { + tournamentBP = new TournamentBP(params->localPredictorSize, + params->localCtrBits, + params->localHistoryTableSize, + params->localHistoryBits, + params->globalPredictorSize, + params->globalHistoryBits, + params->globalCtrBits, + params->choicePredictorSize, + params->choiceCtrBits, + params->instShiftAmt); + predictor = Tournament; + } else { + fatal("Invalid BP selected!"); + } + for (int i=0; i < Impl::MaxThreads; i++) RAS[i].init(params->RASSize); } template void -TwobitBPredUnit::regStats() +BPredUnit::regStats() { lookups .name(name() + ".BPredUnit.lookups") @@ -96,17 +115,20 @@ TwobitBPredUnit::regStats() template void -TwobitBPredUnit::switchOut() +BPredUnit::switchOut() { + // Clear any state upon switch out. for (int i = 0; i < Impl::MaxThreads; ++i) { - predHist[i].clear(); + squash(0, i); } } template void -TwobitBPredUnit::takeOverFrom() +BPredUnit::takeOverFrom() { + // Can reset all predictor state, but it's not necessarily better + // than leaving it be. /* for (int i = 0; i < Impl::MaxThreads; ++i) RAS[i].reset(); @@ -118,11 +140,10 @@ TwobitBPredUnit::takeOverFrom() template bool -TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) +BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) { // See if branch predictor predicts taken. // If so, get its target addr either from the BTB or the RAS. - // Once that's done, speculatively update the predictor? // Save off record of branch stuff so the RAS can be fixed // up once it's done. @@ -133,20 +154,25 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++lookups; + void *bp_history = NULL; + if (inst->isUncondCtrl()) { DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); pred_taken = true; + // Tell the BP there was an unconditional branch. + BPUncond(bp_history); } else { ++condPredicted; - pred_taken = BPLookup(PC); + pred_taken = BPLookup(PC, bp_history); DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i " "for PC %#x\n", tid, pred_taken, inst->readPC()); } - PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid); + PredictorHistory predict_record(inst->seqNum, PC, pred_taken, + bp_history, tid); // Now lookup in the BTB or RAS. if (pred_taken) { @@ -187,7 +213,7 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) if (BTB.valid(PC, tid)) { ++BTBHits; - //If it's anything else, use the BTB to get the target addr. + // If it's not a return, use the BTB to get the target addr. target = BTB.lookup(PC, tid); DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted" @@ -221,7 +247,7 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) template void -TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) +BPredUnit::update(const InstSeqNum &done_sn, unsigned tid) { DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" "number %lli.\n", tid, done_sn); @@ -229,8 +255,9 @@ TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. - BP.update(predHist[tid].back().PC, - predHist[tid].back().predTaken); + BPUpdate(predHist[tid].back().PC, + predHist[tid].back().predTaken, + predHist[tid].back().bpHistory); predHist[tid].pop_back(); } @@ -238,13 +265,13 @@ TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) template void -TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) { History &pred_hist = predHist[tid]; while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().usedRAS) { + if (pred_hist.front().usedRAS) { DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i," " target: %#x.\n", tid, @@ -255,12 +282,15 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) pred_hist.front().RASTarget); } else if (pred_hist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added " - "to the RAS.\n",tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry " + "added to the RAS.\n",tid); RAS[tid].pop(); } + // This call should delete the bpHistory. + BPSquash(pred_hist.front().bpHistory); + pred_hist.pop_front(); } @@ -268,10 +298,10 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) template void -TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, - const Addr &corr_target, - const bool actually_taken, - unsigned tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken, + unsigned tid) { // Now that we know that a branch was mispredicted, we need to undo // all the branches that have been seen up until this branch and @@ -285,40 +315,96 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, "setting target to %#x.\n", tid, squashed_sn, corr_target); - while (!pred_hist.empty() && - pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().usedRAS) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, " - "target: %#x.\n", - tid, - pred_hist.front().RASIndex, - pred_hist.front().RASTarget); - - RAS[tid].restore(pred_hist.front().RASIndex, - pred_hist.front().RASTarget); - } else if (pred_hist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry" - " added to the RAS.\n", tid); - - RAS[tid].pop(); - } - - pred_hist.pop_front(); - } + squash(squashed_sn, tid); // If there's a squash due to a syscall, there may not be an entry // corresponding to the squash. In that case, don't bother trying to // fix up the entry. if (!pred_hist.empty()) { - pred_hist.front().predTaken = actually_taken; - + assert(pred_hist.front().seqNum == squashed_sn); if (pred_hist.front().usedRAS) { ++RASIncorrect; } - BP.update(pred_hist.front().PC, actually_taken); + BPUpdate(pred_hist.front().PC, actually_taken, + pred_hist.front().bpHistory); BTB.update(pred_hist.front().PC, corr_target, tid); pred_hist.pop_front(); } } + +template +void +BPredUnit::BPUncond(void * &bp_history) +{ + // Only the tournament predictor cares about unconditional branches. + if (predictor == Tournament) { + tournamentBP->uncondBr(bp_history); + } +} + +template +void +BPredUnit::BPSquash(void *bp_history) +{ + if (predictor == Local) { + localBP->squash(bp_history); + } else if (predictor == Tournament) { + tournamentBP->squash(bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +bool +BPredUnit::BPLookup(Addr &inst_PC, void * &bp_history) +{ + if (predictor == Local) { + return localBP->lookup(inst_PC, bp_history); + } else if (predictor == Tournament) { + return tournamentBP->lookup(inst_PC, bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +void +BPredUnit::BPUpdate(Addr &inst_PC, bool taken, void *bp_history) +{ + if (predictor == Local) { + localBP->update(inst_PC, taken, bp_history); + } else if (predictor == Tournament) { + tournamentBP->update(inst_PC, taken, bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +void +BPredUnit::dump() +{ + typename History::iterator pred_hist_it; + + for (int i = 0; i < Impl::MaxThreads; ++i) { + if (!predHist[i].empty()) { + pred_hist_it = predHist[i].begin(); + + cprintf("predHist[%i].size(): %i\n", i, predHist[i].size()); + + while (pred_hist_it != predHist[i].end()) { + cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, " + "bpHistory:%#x\n", + (*pred_hist_it).seqNum, (*pred_hist_it).PC, + (*pred_hist_it).tid, (*pred_hist_it).predTaken, + (*pred_hist_it).bpHistory); + pred_hist_it++; + } + + cprintf("\n"); + } + } +} diff --git a/cpu/o3/cpu_policy.hh b/cpu/o3/cpu_policy.hh index 52227013e..b4249b12d 100644 --- a/cpu/o3/cpu_policy.hh +++ b/cpu/o3/cpu_policy.hh @@ -51,7 +51,7 @@ template struct SimpleCPUPolicy { - typedef TwobitBPredUnit BPredUnit; + typedef BPredUnit BPredUnit; typedef PhysRegFile RegFile; typedef SimpleFreeList FreeList; typedef SimpleRenameMap RenameMap; diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 2ed7ec6fc..8d84d46c8 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -721,6 +721,7 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); break; } diff --git a/cpu/o3/tournament_pred.cc b/cpu/o3/tournament_pred.cc index 89da7b9f5..f8c95abd8 100644 --- a/cpu/o3/tournament_pred.cc +++ b/cpu/o3/tournament_pred.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "base/intmath.hh" #include "cpu/o3/tournament_pred.hh" TournamentBP::TournamentBP(unsigned _localPredictorSize, @@ -49,7 +50,9 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, choiceCtrBits(_choiceCtrBits), instShiftAmt(_instShiftAmt) { - //Should do checks here to make sure sizes are correct (powers of 2) + if (!isPowerOf2(localPredictorSize)) { + fatal("Invalid local predictor size!\n"); + } //Setup the array of counters for the local predictor localCtrs.resize(localPredictorSize); @@ -57,6 +60,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, for (int i = 0; i < localPredictorSize; ++i) localCtrs[i].setBits(localCtrBits); + if (!isPowerOf2(localHistoryTableSize)) { + fatal("Invalid local history table size!\n"); + } + //Setup the history table for the local table localHistoryTable.resize(localHistoryTableSize); @@ -66,6 +73,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, // Setup the local history mask localHistoryMask = (1 << localHistoryBits) - 1; + if (!isPowerOf2(globalPredictorSize)) { + fatal("Invalid global predictor size!\n"); + } + //Setup the array of counters for the global predictor globalCtrs.resize(globalPredictorSize); @@ -77,12 +88,17 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, // Setup the global history mask globalHistoryMask = (1 << globalHistoryBits) - 1; + if (!isPowerOf2(choicePredictorSize)) { + fatal("Invalid choice predictor size!\n"); + } + //Setup the array of counters for the choice predictor choiceCtrs.resize(choicePredictorSize); for (int i = 0; i < choicePredictorSize; ++i) choiceCtrs[i].setBits(choiceCtrBits); + // @todo: Allow for different thresholds between the predictors. threshold = (1 << (localCtrBits - 1)) - 1; threshold = threshold / 2; } @@ -91,165 +107,185 @@ inline unsigned TournamentBP::calcLocHistIdx(Addr &branch_addr) { + // Get low order bits after removing instruction offset. return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); } inline void -TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +TournamentBP::updateGlobalHistTaken() { globalHistory = (globalHistory << 1) | 1; globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] = - (localHistoryTable[local_history_idx] << 1) | 1; } inline void -TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +TournamentBP::updateGlobalHistNotTaken() { globalHistory = (globalHistory << 1); globalHistory = globalHistory & globalHistoryMask; +} +inline +void +TournamentBP::updateLocalHistTaken(unsigned local_history_idx) +{ + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx) +{ localHistoryTable[local_history_idx] = (localHistoryTable[local_history_idx] << 1); } bool -TournamentBP::lookup(Addr &branch_addr) +TournamentBP::lookup(Addr &branch_addr, void * &bp_history) { - uint8_t local_prediction; + bool local_prediction; unsigned local_history_idx; unsigned local_predictor_idx; - uint8_t global_prediction; - uint8_t choice_prediction; + bool global_prediction; + bool choice_prediction; //Lookup in the local predictor to get its branch prediction local_history_idx = calcLocHistIdx(branch_addr); local_predictor_idx = localHistoryTable[local_history_idx] & localHistoryMask; - local_prediction = localCtrs[local_predictor_idx].read(); + local_prediction = localCtrs[local_predictor_idx].read() > threshold; //Lookup in the global predictor to get its branch prediction - global_prediction = globalCtrs[globalHistory].read(); + global_prediction = globalCtrs[globalHistory].read() > threshold; //Lookup in the choice predictor to see which one to use - choice_prediction = choiceCtrs[globalHistory].read(); - - //@todo Put a threshold value in for the three predictors that can - // be set through the constructor (so this isn't hard coded). - //Also should put some of this code into functions. - if (choice_prediction > threshold) { - if (global_prediction > threshold) { - updateHistoriesTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].increment(); - localCtrs[local_history_idx].increment(); - + choice_prediction = choiceCtrs[globalHistory].read() > threshold; + + // Create BPHistory and pass it back to be recorded. + BPHistory *history = new BPHistory; + history->globalHistory = globalHistory; + history->localPredTaken = local_prediction; + history->globalPredTaken = global_prediction; + history->globalUsed = choice_prediction; + bp_history = (void *)history; + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + // Commented code is for doing speculative update of counters and + // all histories. + if (choice_prediction) { + if (global_prediction) { +// updateHistoriesTaken(local_history_idx); +// globalCtrs[globalHistory].increment(); +// localCtrs[local_history_idx].increment(); + updateGlobalHistTaken(); return true; } else { - updateHistoriesNotTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].decrement(); - localCtrs[local_history_idx].decrement(); - +// updateHistoriesNotTaken(local_history_idx); +// globalCtrs[globalHistory].decrement(); +// localCtrs[local_history_idx].decrement(); + updateGlobalHistNotTaken(); return false; } } else { - if (local_prediction > threshold) { - updateHistoriesTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].increment(); - localCtrs[local_history_idx].increment(); - + if (local_prediction) { +// updateHistoriesTaken(local_history_idx); +// globalCtrs[globalHistory].increment(); +// localCtrs[local_history_idx].increment(); + updateGlobalHistTaken(); return true; } else { - updateHistoriesNotTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].decrement(); - localCtrs[local_history_idx].decrement(); - +// updateHistoriesNotTaken(local_history_idx); +// globalCtrs[globalHistory].decrement(); +// localCtrs[local_history_idx].decrement(); + updateGlobalHistNotTaken(); return false; } } } -// Update the branch predictor if it predicted a branch wrong. void -TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +TournamentBP::uncondBr(void * &bp_history) { + // Create BPHistory and pass it back to be recorded. + BPHistory *history = new BPHistory; + history->globalHistory = globalHistory; + history->localPredTaken = true; + history->globalPredTaken = true; + bp_history = static_cast(history); + + updateGlobalHistTaken(); +} - uint8_t local_prediction; +void +TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history) +{ unsigned local_history_idx; unsigned local_predictor_idx; - bool local_pred_taken; + unsigned local_predictor_hist; - uint8_t global_prediction; - bool global_pred_taken; - - // Load the correct global history into the register. - globalHistory = correct_gh; - - // Get the local predictor's current prediction, remove the incorrect - // update, and update the local predictor + // Get the local predictor's current prediction local_history_idx = calcLocHistIdx(branch_addr); - local_predictor_idx = localHistoryTable[local_history_idx]; - local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; - - local_prediction = localCtrs[local_predictor_idx].read(); - local_pred_taken = local_prediction > threshold; - - //Get the global predictor's current prediction, and update the - //global predictor - global_prediction = globalCtrs[globalHistory].read(); - global_pred_taken = global_prediction > threshold; - - //Update the choice predictor to tell it which one was correct - if (local_pred_taken != global_pred_taken) { - //If the local prediction matches the actual outcome, decerement - //the counter. Otherwise increment the counter. - if (local_pred_taken == taken) { - choiceCtrs[globalHistory].decrement(); - } else { - choiceCtrs[globalHistory].increment(); + local_predictor_hist = localHistoryTable[local_history_idx]; + local_predictor_idx = local_predictor_hist & localHistoryMask; + + // Update the choice predictor to tell it which one was correct if + // there was a prediction. + if (bp_history) { + BPHistory *history = static_cast(bp_history); + if (history->localPredTaken != history->globalPredTaken) { + // If the local prediction matches the actual outcome, + // decerement the counter. Otherwise increment the + // counter. + if (history->localPredTaken == taken) { + choiceCtrs[globalHistory].decrement(); + } else if (history->globalPredTaken == taken){ + choiceCtrs[globalHistory].increment(); + } } + + // We're done with this history, now delete it. + delete history; } - if (taken) { - assert(globalHistory < globalPredictorSize && - local_predictor_idx < localPredictorSize); + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + // Update the counters and local history with the proper + // resolution of the branch. Global history is updated + // speculatively and restored upon squash() calls, so it does not + // need to be updated. + if (taken) { localCtrs[local_predictor_idx].increment(); globalCtrs[globalHistory].increment(); - globalHistory = (globalHistory << 1) | 1; - globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] |= 1; + updateLocalHistTaken(local_history_idx); } else { - assert(globalHistory < globalPredictorSize && - local_predictor_idx < localPredictorSize); - localCtrs[local_predictor_idx].decrement(); globalCtrs[globalHistory].decrement(); - globalHistory = (globalHistory << 1); - globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] &= ~1; + updateLocalHistNotTaken(local_history_idx); } } + +void +TournamentBP::squash(void *bp_history) +{ + BPHistory *history = static_cast(bp_history); + + // Restore global history to state prior to this branch. + globalHistory = history->globalHistory; + + // Delete this BPHistory now that we're done with it. + delete history; +} + +#ifdef DEBUG +int +TournamentBP::BPHistory::newCount = 0; +#endif diff --git a/cpu/o3/tournament_pred.hh b/cpu/o3/tournament_pred.hh index 7b600aa53..6d77999cc 100644 --- a/cpu/o3/tournament_pred.hh +++ b/cpu/o3/tournament_pred.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +34,15 @@ #include "cpu/o3/sat_counter.hh" #include +/** + * Implements a tournament branch predictor, hopefully identical to the one + * used in the 21264. It has a local predictor, which uses a local history + * table to index into a table of counters, and a global predictor, which + * uses a global history to index into a table of counters. A choice + * predictor chooses between the two. Only the global history register + * is speculatively updated, the rest are updated upon branches committing + * or misspeculating. + */ class TournamentBP { public: @@ -53,30 +62,95 @@ class TournamentBP /** * Looks up the given address in the branch predictor and returns - * a true/false value as to whether it is taken. + * a true/false value as to whether it is taken. Also creates a + * BPHistory object to store any state it will need on squash/update. * @param branch_addr The address of the branch to look up. + * @param bp_history Pointer that will be set to the BPHistory object. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr); + bool lookup(Addr &branch_addr, void * &bp_history); + + /** + * Records that there was an unconditional branch, and modifies + * the bp history to point to an object that has the previous + * global history stored in it. + * @param bp_history Pointer that will be set to the BPHistory object. + */ + void uncondBr(void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. + * @param bp_history Pointer to the BPHistory object that was created + * when the branch was predicted. + */ + void update(Addr &branch_addr, bool taken, void *bp_history); + + /** + * Restores the global branch history on a squash. + * @param bp_history Pointer to the BPHistory object that has the + * previous global branch history in it. */ - void update(Addr &branch_addr, unsigned global_history, bool taken); + void squash(void *bp_history); + /** Returns the global history. */ inline unsigned readGlobalHist() { return globalHistory; } private: - + /** + * Returns if the branch should be taken or not, given a counter + * value. + * @param count The counter value. + */ inline bool getPrediction(uint8_t &count); + /** + * Returns the local history index, given a branch address. + * @param branch_addr The branch's PC address. + */ inline unsigned calcLocHistIdx(Addr &branch_addr); - inline void updateHistoriesTaken(unsigned local_history_idx); + /** Updates global history as taken. */ + inline void updateGlobalHistTaken(); - inline void updateHistoriesNotTaken(unsigned local_history_idx); + /** Updates global history as not taken. */ + inline void updateGlobalHistNotTaken(); + + /** + * Updates local histories as taken. + * @param local_history_idx The local history table entry that + * will be updated. + */ + inline void updateLocalHistTaken(unsigned local_history_idx); + + /** + * Updates local histories as not taken. + * @param local_history_idx The local history table entry that + * will be updated. + */ + inline void updateLocalHistNotTaken(unsigned local_history_idx); + + /** + * The branch history information that is created upon predicting + * a branch. It will be passed back upon updating and squashing, + * when the BP can use this information to update/restore its + * state properly. + */ + struct BPHistory { +#ifdef DEBUG + BPHistory() + { newCount++; } + ~BPHistory() + { newCount--; } + + static int newCount; +#endif + unsigned globalHistory; + bool localPredTaken; + bool globalPredTaken; + bool globalUsed; + }; /** Local counters. */ std::vector localCtrs; @@ -101,7 +175,6 @@ class TournamentBP /** Mask to get the proper local history. */ unsigned localHistoryMask; - /** Array of counters that make up the global predictor. */ std::vector globalCtrs; @@ -120,7 +193,6 @@ class TournamentBP /** Mask to get the proper global history. */ unsigned globalHistoryMask; - /** Array of counters that make up the choice predictor. */ std::vector choiceCtrs; diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh index 1f543ec6e..9dc50c1fb 100644 --- a/cpu/ozone/ozone_impl.hh +++ b/cpu/ozone/ozone_impl.hh @@ -54,7 +54,7 @@ struct OzoneImpl { // Would like to put these into their own area. // typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; + typedef BPredUnit BranchPred; typedef FrontEnd FrontEnd; // Will need IQ, LSQ eventually typedef LWBackEnd BackEnd; diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh index 961bf2ea9..26845271a 100644 --- a/cpu/ozone/simple_impl.hh +++ b/cpu/ozone/simple_impl.hh @@ -51,7 +51,7 @@ struct SimpleImpl { // Would like to put these into their own area. // typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; + typedef BPredUnit BranchPred; typedef FrontEnd FrontEnd; // Will need IQ, LSQ eventually typedef InorderBackEnd BackEnd; diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh index 647da1781..7b5c6f67b 100644 --- a/cpu/ozone/simple_params.hh +++ b/cpu/ozone/simple_params.hh @@ -1,4 +1,30 @@ - +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ #define __CPU_OZONE_SIMPLE_PARAMS_HH__ @@ -29,7 +55,6 @@ class SimpleParams : public BaseCPU::Params AlphaITB *itb; AlphaDTB *dtb; #else std::vector workload; -// Process *process; #endif // FULL_SYSTEM //Page Table @@ -103,6 +128,7 @@ class SimpleParams : public BaseCPU::Params // // Branch predictor (BP & BTB) // + std::string predType; unsigned localPredictorSize; unsigned localCtrBits; unsigned localHistoryTableSize; diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py index d719bf783..043c3c08f 100644 --- a/python/m5/objects/AlphaFullCPU.py +++ b/python/m5/objects/AlphaFullCPU.py @@ -55,6 +55,7 @@ class DerivAlphaFullCPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") localHistoryTableSize = Param.Unsigned("Size of local history table") diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py index 3fca61e28..ea8b6b537 100644 --- a/python/m5/objects/OzoneCPU.py +++ b/python/m5/objects/OzoneCPU.py @@ -57,6 +57,7 @@ class DerivOzoneCPU(BaseCPU): commitWidth = Param.Unsigned("Commit width") squashWidth = Param.Unsigned("Squash width") + predType = Param.String("Type of branch predictor ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") localHistoryTableSize = Param.Unsigned("Size of local history table") -- cgit v1.2.3 From f5b5391cfbcf627bc2813042cb75726328338dd4 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 25 May 2006 17:13:00 -0400 Subject: Missed this file in last check in. --HG-- extra : convert_revision : 6c42350fc3cebb5cf4a6da8ea0c51cca15b3f99f --- cpu/ozone/cpu_builder.cc | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 64aa49c71..1ab7a4c29 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include @@ -103,6 +130,7 @@ Param renameToROBDelay; Param commitWidth; Param squashWidth; +Param predType; Param localPredictorSize; Param localCtrBits; Param localHistoryTableSize; @@ -236,6 +264,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), INIT_PARAM(localHistoryTableSize, "Size of local history table"), @@ -375,7 +404,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; - + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; params->localHistoryTableSize = localHistoryTableSize; @@ -504,6 +533,7 @@ Param renameToROBDelay; Param commitWidth; Param squashWidth; +Param predType; Param localPredictorSize; Param localCtrBits; Param localHistoryTableSize; @@ -636,6 +666,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), INIT_PARAM(localHistoryTableSize, "Size of local history table"), @@ -774,7 +805,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; - + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; params->localHistoryTableSize = localHistoryTableSize; -- cgit v1.2.3 From a514bf21508f4398f5cf7322f5f2a1ed212bbcaa Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 31 May 2006 11:45:02 -0400 Subject: Comments and code cleanup. cpu/activity.cc: cpu/activity.hh: cpu/o3/alpha_cpu.hh: Updates to include comments. cpu/base_dyn_inst.cc: Remove call to thread->misspeculating(), as it's never actually misspeculating. --HG-- extra : convert_revision : 86574d684770fac9b480475acca048ea418cdac3 --- cpu/activity.cc | 33 +++++++++++++ cpu/activity.hh | 63 ++++++++++++++++++++++-- cpu/base_dyn_inst.cc | 4 +- cpu/o3/alpha_cpu.hh | 112 +++++++++++++++++++++++++++++++++++++----- cpu/o3/alpha_cpu_impl.hh | 48 ++++++++++-------- cpu/o3/alpha_dyn_inst.hh | 8 +++ cpu/o3/alpha_dyn_inst_impl.hh | 18 ++++--- cpu/o3/alpha_params.hh | 2 +- cpu/o3/comm.hh | 11 ++--- cpu/o3/commit.hh | 40 +++++++++++++-- cpu/o3/commit_impl.hh | 7 +-- cpu/o3/cpu.hh | 67 ++++++++++++++++--------- cpu/o3/cpu_policy.hh | 28 ++++++++++- cpu/o3/decode.hh | 3 ++ cpu/o3/decode_impl.hh | 32 +++++++----- cpu/o3/dep_graph.hh | 22 +++++++++ cpu/o3/fetch.hh | 23 +++++++-- cpu/o3/fetch_impl.hh | 42 ++++++++-------- cpu/o3/fu_pool.cc | 2 + cpu/o3/fu_pool.hh | 3 ++ cpu/o3/iew.hh | 23 ++++++++- cpu/o3/iew_impl.hh | 13 ++++- cpu/o3/inst_queue.hh | 34 ++++++++++--- cpu/o3/inst_queue_impl.hh | 61 ++++++++++++++++++----- cpu/o3/lsq.hh | 7 ++- cpu/o3/lsq_unit.hh | 17 +++++-- cpu/o3/lsq_unit_impl.hh | 1 - cpu/o3/mem_dep_unit.hh | 13 ++++- cpu/o3/mem_dep_unit_impl.hh | 6 ++- cpu/o3/rename.hh | 8 +++ cpu/o3/rename_impl.hh | 2 + cpu/o3/rename_map.hh | 7 +-- cpu/o3/rob.hh | 3 ++ cpu/o3/store_set.cc | 25 ++++++++++ cpu/o3/store_set.hh | 40 +++++++++++++++ cpu/o3/thread_state.hh | 31 ++++++++++-- 36 files changed, 702 insertions(+), 157 deletions(-) diff --git a/cpu/activity.cc b/cpu/activity.cc index 6dcb6e341..b0b16446c 100644 --- a/cpu/activity.cc +++ b/cpu/activity.cc @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include "base/timebuf.hh" #include "cpu/activity.hh" @@ -14,6 +41,8 @@ ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency, void ActivityRecorder::activity() { + // If we've already recorded activity for this cycle, we don't + // want to increment the count any more. if (activityBuffer[0]) { return; } @@ -28,6 +57,8 @@ ActivityRecorder::activity() void ActivityRecorder::advance() { + // If there's a 1 in the slot that is about to be erased once the + // time buffer advances, then decrement the activityCount. if (activityBuffer[-longestLatency]) { --activityCount; @@ -46,6 +77,7 @@ ActivityRecorder::advance() void ActivityRecorder::activateStage(const int idx) { + // Increment the activity count if this stage wasn't already active. if (!stageActive[idx]) { ++activityCount; @@ -62,6 +94,7 @@ ActivityRecorder::activateStage(const int idx) void ActivityRecorder::deactivateStage(const int idx) { + // Decrement the activity count if this stage was active. if (stageActive[idx]) { --activityCount; diff --git a/cpu/activity.hh b/cpu/activity.hh index 2d53dc4bb..2c0df5efb 100644 --- a/cpu/activity.hh +++ b/cpu/activity.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_ACTIVITY_HH__ #define __CPU_ACTIVITY_HH__ @@ -5,33 +32,61 @@ #include "base/timebuf.hh" #include "base/trace.hh" +/** + * ActivityRecorder helper class that informs the CPU if it can switch + * over to being idle or not. It works by having a time buffer as + * long as any time buffer in the CPU, and the CPU and all of its + * stages inform the ActivityRecorder when they write to any time + * buffer. The ActivityRecorder marks a 1 in the "0" slot of the time + * buffer any time a stage writes to a time buffer, and it advances + * its time buffer at the same time as all other stages. The + * ActivityRecorder also records if a stage has activity to do next + * cycle. The recorder keeps a count of these two. Thus any time the + * count is non-zero, there is either communication still in flight, + * or activity that still must be done, meaning that the CPU can not + * idle. If count is zero, then the CPU can safely idle as it has no + * more outstanding work to do. + */ class ActivityRecorder { public: ActivityRecorder(int num_stages, int longest_latency, int count); /** Records that there is activity this cycle. */ void activity(); - /** Advances the activity buffer, decrementing the activityCount if active - * communication just left the time buffer, and descheduling the CPU if - * there is no activity. + + /** Advances the activity buffer, decrementing the activityCount + * if active communication just left the time buffer, and + * determining if there is no activity. */ void advance(); + /** Marks a stage as active. */ void activateStage(const int idx); + /** Deactivates a stage. */ void deactivateStage(const int idx); + /** Returns how many things are active within the recorder. */ int getActivityCount() { return activityCount; } + /** Sets the count to a starting value. Can be used to disable + * the idling option. + */ void setActivityCount(int count) { activityCount = count; } + /** Returns if the CPU should be active. */ bool active() { return activityCount; } + /** Clears the time buffer and the activity count. */ void reset(); + /** Debug function to dump the contents of the time buffer. */ void dump(); + /** Debug function to ensure that the activity count matches the + * contents of the time buffer. + */ void validate(); private: @@ -45,6 +100,7 @@ class ActivityRecorder { */ TimeBuffer activityBuffer; + /** Longest latency time buffer in the CPU. */ int longestLatency; /** Tracks how many stages and cycles of time buffer have @@ -58,6 +114,7 @@ class ActivityRecorder { */ int activityCount; + /** Number of stages that can be marked as active or inactive. */ int numStages; /** Records which stages are active/inactive. */ diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index 7ab760ae3..64a995689 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -166,6 +166,8 @@ BaseDynInst::~BaseDynInst() delete traceData; } + fault = NoFault; + --instcount; DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n", @@ -289,7 +291,7 @@ BaseDynInst::copy(Addr dest) { uint8_t data[64]; FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr || thread->misspeculating()); + assert(thread->copySrcPhysAddr); MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); req->asid = asid; diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh index 5c89e3462..4c452c4dd 100644 --- a/cpu/o3/alpha_cpu.hh +++ b/cpu/o3/alpha_cpu.hh @@ -39,6 +39,14 @@ namespace Kernel { class Statistics; }; +/** + * AlphaFullCPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ template class AlphaFullCPU : public FullO3CPU { @@ -56,145 +64,211 @@ class AlphaFullCPU : public FullO3CPU /** Constructs an AlphaFullCPU with the given parameters. */ AlphaFullCPU(Params *params); + /** + * Derived ExecContext class for use with the AlphaFullCPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + */ class AlphaXC : public ExecContext { public: + /** Pointer to the CPU. */ AlphaFullCPU *cpu; + /** Pointer to the thread state that this XC corrseponds to. */ O3ThreadState *thread; + /** Returns a pointer to this CPU. */ virtual BaseCPU *getCpuPtr() { return cpu; } + /** Sets this CPU's ID. */ virtual void setCpuId(int id) { cpu->cpu_id = id; } + /** Reads this CPU's ID. */ virtual int readCpuId() { return cpu->cpu_id; } + /** Returns a pointer to functional memory. */ virtual FunctionalMemory *getMemPtr() { return thread->mem; } #if FULL_SYSTEM + /** Returns a pointer to the system. */ virtual System *getSystemPtr() { return cpu->system; } + /** Returns a pointer to physical memory. */ virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + /** Returns a pointer to the ITB. */ virtual AlphaITB *getITBPtr() { return cpu->itb; } - virtual AlphaDTB * getDTBPtr() { return cpu->dtb; } + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + /** Returns a pointer to this thread's kernel statistics. */ virtual Kernel::Statistics *getKernelStats() { return thread->kernelStats; } #else + /** Returns a pointer to this thread's process. */ virtual Process *getProcessPtr() { return thread->process; } #endif - + /** Returns this thread's status. */ virtual Status status() const { return thread->status(); } + /** Sets this thread's status. */ virtual void setStatus(Status new_status) { thread->setStatus(new_status); } - /// Set the status to Active. Optional delay indicates number of - /// cycles to wait before beginning execution. + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ virtual void activate(int delay = 1); - /// Set the status to Suspended. + /** Set the status to Suspended. */ virtual void suspend(); - /// Set the status to Unallocated. + /** Set the status to Unallocated. */ virtual void deallocate(); - /// Set the status to Halted. + /** Set the status to Halted. */ virtual void halt(); #if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ virtual void dumpFuncProfile(); #endif - + /** Takes over execution of a thread from another CPU. */ virtual void takeOverFrom(ExecContext *old_context); + /** Registers statistics associated with this XC. */ virtual void regStats(const std::string &name); + /** Serializes state. */ virtual void serialize(std::ostream &os); + /** Unserializes state. */ virtual void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM + /** Returns pointer to the quiesce event. */ virtual EndQuiesceEvent *getQuiesceEvent(); + /** Reads the last tick that this thread was activated on. */ virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ virtual Tick readLastSuspend(); + /** Clears the function profiling information. */ virtual void profileClear(); + /** Samples the function profiling information. */ virtual void profileSample(); #endif - + /** Returns this thread's ID number. */ virtual int getThreadNum() { return thread->tid; } + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ virtual TheISA::MachInst getInst(); + /** Copies the architectural registers from another XC into this XC. */ virtual void copyArchRegs(ExecContext *xc); + /** Resets all architectural registers to 0. */ virtual void clearArchRegs(); + /** Reads an integer register. */ virtual uint64_t readIntReg(int reg_idx); + /** Reads a single precision floating point register. */ virtual float readFloatRegSingle(int reg_idx); + /** Reads a double precision floating point register. */ virtual double readFloatRegDouble(int reg_idx); + /** Reads a floating point register as an integer value. */ virtual uint64_t readFloatRegInt(int reg_idx); + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val); + /** Sets a single precision fp register to a value. */ virtual void setFloatRegSingle(int reg_idx, float val); + /** Sets a double precision fp register to a value. */ virtual void setFloatRegDouble(int reg_idx, double val); + /** Sets a fp register to an integer value. */ virtual void setFloatRegInt(int reg_idx, uint64_t val); + /** Reads this thread's PC. */ virtual uint64_t readPC() { return cpu->readPC(thread->tid); } + /** Sets this thread's PC. */ virtual void setPC(uint64_t val); + /** Reads this thread's next PC. */ virtual uint64_t readNextPC() { return cpu->readNextPC(thread->tid); } + /** Sets this thread's next PC. */ virtual void setNextPC(uint64_t val); + /** Reads a miscellaneous register. */ virtual MiscReg readMiscReg(int misc_reg) { return cpu->readMiscReg(misc_reg, thread->tid); } + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); } + /** Sets a misc. register. */ virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + /** Returns the number of consecutive store conditional failures. */ // @todo: Figure out where these store cond failures should go. virtual unsigned readStCondFailures() { return thread->storeCondFailures; } + /** Sets the number of consecutive store conditional failures. */ virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; } #if FULL_SYSTEM + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); } #endif - // Only really makes sense for old CPU model. Lots of code // outside the CPU still checks this function, so it will // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ virtual bool misspeculating() { return false; } #if !FULL_SYSTEM + /** Gets a syscall argument by index. */ virtual IntReg getSyscallArg(int i); + /** Sets a syscall argument. */ virtual void setSyscallArg(int i, IntReg val); + /** Sets the syscall return value. */ virtual void setSyscallReturn(SyscallReturn return_value); + /** Executes a syscall in SE mode. */ virtual void syscall() { return cpu->syscall(thread->tid); } + /** Reads the funcExeInst counter. */ virtual Counter readFuncExeInst() { return thread->funcExeInst; } #endif }; @@ -260,19 +334,32 @@ class AlphaFullCPU : public FullO3CPU } #endif + /** Reads a miscellaneous register. */ MiscReg readMiscReg(int misc_reg, unsigned tid); + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + /** Sets a miscellaneous register. */ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the XC. + */ void squashFromXC(unsigned tid); #if FULL_SYSTEM + /** Posts an interrupt. */ void post_interrupt(int int_num, int index); - + /** Reads the interrupt flag. */ int readIntrFlag(); /** Sets the interrupt flags. */ void setIntrFlag(int val); @@ -298,7 +385,7 @@ class AlphaFullCPU : public FullO3CPU /** Executes a syscall. * @todo: Determine if this needs to be virtual. */ - void syscall(int thread_num); + void syscall(int tid); /** Gets a syscall argument. */ IntReg getSyscallArg(int i, int tid); @@ -424,6 +511,7 @@ class AlphaFullCPU : public FullO3CPU Addr lockAddr; + /** Temporary fix for the lock flag, works in the UP case. */ bool lockFlag; }; diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index 91cd3d9e6..f39fdf6b6 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -59,10 +59,12 @@ AlphaFullCPU::AlphaFullCPU(Params *params) { DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + // Setup any thread state. this->thread.resize(this->numThreads); for (int i = 0; i < this->numThreads; ++i) { #if FULL_SYSTEM + // SMT is not supported in FS mode yet. assert(this->numThreads == 1); this->thread[i] = new Thread(this, 0, params->mem); this->thread[i]->setStatus(ExecContext::Suspended); @@ -87,29 +89,34 @@ AlphaFullCPU::AlphaFullCPU(Params *params) } #endif // !FULL_SYSTEM - this->thread[i]->numInst = 0; - ExecContext *xc_proxy; - AlphaXC *alpha_xc_proxy = new AlphaXC; + // Setup the XC that will serve as the interface to the threads/CPU. + AlphaXC *alpha_xc = new AlphaXC; + // If we're using a checker, then the XC should be the + // CheckerExecContext. if (params->checker) { - xc_proxy = new CheckerExecContext(alpha_xc_proxy, this->checker); + xc_proxy = new CheckerExecContext( + alpha_xc, this->checker); } else { - xc_proxy = alpha_xc_proxy; + xc_proxy = alpha_xc; } - alpha_xc_proxy->cpu = this; - alpha_xc_proxy->thread = this->thread[i]; + alpha_xc->cpu = this; + alpha_xc->thread = this->thread[i]; #if FULL_SYSTEM + // Setup quiesce event. this->thread[i]->quiesceEvent = new EndQuiesceEvent(xc_proxy); this->thread[i]->lastActivate = 0; this->thread[i]->lastSuspend = 0; #endif + // Give the thread the XC. this->thread[i]->xcProxy = xc_proxy; + // Add the XC to the CPU's list of XC's. this->execContexts.push_back(xc_proxy); } @@ -171,6 +178,7 @@ AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) setStatus(old_context->status()); copyArchRegs(old_context); setCpuId(old_context->readCpuId()); + #if !FULL_SYSTEM thread->funcExeInst = old_context->readFuncExeInst(); #else @@ -394,7 +402,6 @@ template uint64_t AlphaFullCPU::AlphaXC::readIntReg(int reg_idx) { - DPRINTF(Fault, "Reading int register through the XC!\n"); return cpu->readArchIntReg(reg_idx, thread->tid); } @@ -402,7 +409,6 @@ template float AlphaFullCPU::AlphaXC::readFloatRegSingle(int reg_idx) { - DPRINTF(Fault, "Reading float register through the XC!\n"); return cpu->readArchFloatRegSingle(reg_idx, thread->tid); } @@ -410,7 +416,6 @@ template double AlphaFullCPU::AlphaXC::readFloatRegDouble(int reg_idx) { - DPRINTF(Fault, "Reading float register through the XC!\n"); return cpu->readArchFloatRegDouble(reg_idx, thread->tid); } @@ -418,7 +423,6 @@ template uint64_t AlphaFullCPU::AlphaXC::readFloatRegInt(int reg_idx) { - DPRINTF(Fault, "Reading floatint register through the XC!\n"); return cpu->readArchFloatRegInt(reg_idx, thread->tid); } @@ -426,9 +430,9 @@ template void AlphaFullCPU::AlphaXC::setIntReg(int reg_idx, uint64_t val) { - DPRINTF(Fault, "Setting int register through the XC!\n"); cpu->setArchIntReg(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -438,9 +442,9 @@ template void AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) { - DPRINTF(Fault, "Setting float register through the XC!\n"); cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -450,9 +454,9 @@ template void AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) { - DPRINTF(Fault, "Setting float register through the XC!\n"); cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -462,9 +466,9 @@ template void AlphaFullCPU::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val) { - DPRINTF(Fault, "Setting floatint register through the XC!\n"); cpu->setArchFloatRegInt(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -476,6 +480,7 @@ AlphaFullCPU::AlphaXC::setPC(uint64_t val) { cpu->setPC(val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -487,6 +492,7 @@ AlphaFullCPU::AlphaXC::setNextPC(uint64_t val) { cpu->setNextPC(val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -496,10 +502,9 @@ template Fault AlphaFullCPU::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val) { - DPRINTF(Fault, "Setting misc register through the XC!\n"); - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -509,12 +514,12 @@ AlphaFullCPU::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val) template Fault -AlphaFullCPU::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +AlphaFullCPU::AlphaXC::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) { - DPRINTF(Fault, "Setting misc register through the XC!\n"); - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -595,7 +600,6 @@ AlphaFullCPU::post_interrupt(int int_num, int index) if (this->thread[0]->status() == ExecContext::Suspended) { DPRINTF(IPI,"Suspended Processor awoke\n"); -// xcProxies[0]->activate(); this->execContexts[0]->activate(); } } @@ -658,6 +662,7 @@ template void AlphaFullCPU::trap(Fault fault, unsigned tid) { + // Pass the thread's XC into the invoke method. fault->invoke(this->execContexts[tid]); } @@ -708,6 +713,7 @@ AlphaFullCPU::processInterrupts() if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { this->setMiscReg(IPR_ISR, summary, 0); this->setMiscReg(IPR_INTID, ipl, 0); + // Checker needs to know these two registers were updated. if (this->checker) { this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); diff --git a/cpu/o3/alpha_dyn_inst.hh b/cpu/o3/alpha_dyn_inst.hh index 1c5b738aa..de4d40358 100644 --- a/cpu/o3/alpha_dyn_inst.hh +++ b/cpu/o3/alpha_dyn_inst.hh @@ -86,23 +86,31 @@ class AlphaDynInst : public BaseDynInst void initVars(); public: + /** Reads a miscellaneous register. */ MiscReg readMiscReg(int misc_reg) { return this->cpu->readMiscReg(misc_reg, this->threadNumber); } + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { return this->cpu->readMiscRegWithEffect(misc_reg, fault, this->threadNumber); } + /** Sets a misc. register. */ Fault setMiscReg(int misc_reg, const MiscReg &val) { this->instResult.integer = val; return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); } + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) { return this->cpu->setMiscRegWithEffect(misc_reg, val, diff --git a/cpu/o3/alpha_dyn_inst_impl.hh b/cpu/o3/alpha_dyn_inst_impl.hh index 541d5ab82..d82d46830 100644 --- a/cpu/o3/alpha_dyn_inst_impl.hh +++ b/cpu/o3/alpha_dyn_inst_impl.hh @@ -64,9 +64,10 @@ template Fault AlphaDynInst::execute() { - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the XC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the XC). Fix this. bool in_syscall = this->thread->inSyscall; this->thread->inSyscall = true; @@ -81,9 +82,10 @@ template Fault AlphaDynInst::initiateAcc() { - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the XC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the XC). Fix this. bool in_syscall = this->thread->inSyscall; this->thread->inSyscall = true; @@ -99,10 +101,12 @@ Fault AlphaDynInst::completeAcc() { if (this->isLoad()) { + // Loads need the request's data to complete the access. this->fault = this->staticInst->completeAcc(this->req->data, this, this->traceData); } else if (this->isStore()) { + // Stores need the result of the request to complete their access. this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, this, this->traceData); @@ -118,9 +122,11 @@ template Fault AlphaDynInst::hwrei() { + // Can only do a hwrei when in pal mode. if (!this->cpu->inPalMode(this->readPC())) return new AlphaISA::UnimplementedOpcodeFault; + // Set the next PC based on the value of the EXC_ADDR IPR. this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR, this->threadNumber)); diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index 5eb00426d..f0836a9fd 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -125,7 +125,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // - // Branch predictor (BP & BTB) + // Branch predictor (BP, BTB, RAS) // std::string predType; unsigned localPredictorSize; diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh index c36c58d3d..d9a242a12 100644 --- a/cpu/o3/comm.hh +++ b/cpu/o3/comm.hh @@ -41,6 +41,7 @@ // typedef yet are not templated on the Impl. For now it will be defined here. typedef short int PhysRegIndex; +/** Struct that defines the information passed from fetch to decode. */ template struct DefaultFetchDefaultDecode { typedef typename Impl::DynInstPtr DynInstPtr; @@ -53,6 +54,7 @@ struct DefaultFetchDefaultDecode { bool clearFetchFault; }; +/** Struct that defines the information passed from decode to rename. */ template struct DefaultDecodeDefaultRename { typedef typename Impl::DynInstPtr DynInstPtr; @@ -62,6 +64,7 @@ struct DefaultDecodeDefaultRename { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines the information passed from rename to IEW. */ template struct DefaultRenameDefaultIEW { typedef typename Impl::DynInstPtr DynInstPtr; @@ -71,6 +74,7 @@ struct DefaultRenameDefaultIEW { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines the information passed from IEW to commit. */ template struct DefaultIEWDefaultCommit { typedef typename Impl::DynInstPtr DynInstPtr; @@ -98,6 +102,7 @@ struct IssueStruct { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines all backwards communication. */ template struct TimeBufStruct { struct decodeComm { @@ -119,13 +124,7 @@ struct TimeBufStruct { decodeComm decodeInfo[Impl::MaxThreads]; - // Rename can't actually tell anything to squash or send a new PC back - // because it doesn't do anything along those lines. But maybe leave - // these fields in here to keep the stages mostly orthagonal. struct renameComm { - bool squash; - - uint64_t nextPC; }; renameComm renameInfo[Impl::MaxThreads]; diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index 66abf8dc6..d93822394 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -84,6 +84,9 @@ class DefaultCommit typedef O3ThreadState Thread; + /** Event class used to schedule a squash due to a trap (fault or + * interrupt) to happen on a specific cycle. + */ class TrapEvent : public Event { private: DefaultCommit *commit; @@ -161,7 +164,7 @@ class DefaultCommit Fetch *fetchStage; - /** Sets the poitner to the IEW stage. */ + /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); /** The pointer to the IEW stage. Used solely to ensure that @@ -182,10 +185,13 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); + /** Initializes the switching out of commit. */ void switchOut(); + /** Completes the switch out of commit. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Ticks the commit stage, which tries to commit instructions. */ @@ -199,11 +205,18 @@ class DefaultCommit /** Returns the number of free ROB entries for a specific thread. */ unsigned numROBFreeEntries(unsigned tid); + /** Generates an event to schedule a squash due to a trap. */ + void generateTrapEvent(unsigned tid); + + /** Records that commit needs to initiate a squash due to an + * external state update through the XC. + */ void generateXCEvent(unsigned tid); private: /** Updates the overall status of commit with the nextStatus, and - * tell the CPU if commit is active/inactive. */ + * tell the CPU if commit is active/inactive. + */ void updateStatus(); /** Sets the next status based on threads' statuses, which becomes the @@ -222,10 +235,13 @@ class DefaultCommit */ bool changedROBEntries(); + /** Squashes all in flight instructions. */ void squashAll(unsigned tid); + /** Handles squashing due to a trap. */ void squashFromTrap(unsigned tid); + /** Handles squashing due to an XC write. */ void squashFromXC(unsigned tid); /** Commits as many instructions as possible. */ @@ -236,8 +252,6 @@ class DefaultCommit */ bool commitHead(DynInstPtr &head_inst, unsigned inst_num); - void generateTrapEvent(unsigned tid); - /** Gets instructions from rename and inserts them into the ROB. */ void getInsts(); @@ -259,12 +273,16 @@ class DefaultCommit */ uint64_t readPC() { return PC[0]; } + /** Returns the PC of a specific thread. */ uint64_t readPC(unsigned tid) { return PC[tid]; } + /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + /** Reads the PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } + /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } private: @@ -304,6 +322,7 @@ class DefaultCommit /** Memory interface. Used for d-cache accesses. */ MemInterface *dcacheInterface; + /** Vector of all of the threads. */ std::vector thread; Fault fetchFault; @@ -362,17 +381,27 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; + /** Is a switch out pending. */ bool switchPending; + + /** Is commit switched out. */ bool switchedOut; + /** The latency to handle a trap. Used when scheduling trap + * squash event. + */ Tick trapLatency; Tick fetchTrapLatency; Tick fetchFaultTick; + /** The commit PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ Addr PC[Impl::MaxThreads]; + /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; /** The sequence number of the youngest valid instruction in the ROB. */ @@ -384,6 +413,7 @@ class DefaultCommit /** Rename map interface. */ RenameMap *renameMap[Impl::MaxThreads]; + /** Updates commit stats based on this instruction. */ void updateComInstStats(DynInstPtr &inst); /** Stat for the total number of committed instructions. */ @@ -417,7 +447,9 @@ class DefaultCommit /** Total number of committed branches. */ Stats::Vector<> statComBranches; + /** Number of cycles where the commit bandwidth limit is reached. */ Stats::Scalar<> commitEligibleSamples; + /** Number of instructions not committed due to bandwidth limits. */ Stats::Vector<> commitEligible; }; diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 346a8bc1c..9409697eb 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -691,7 +691,7 @@ DefaultCommit::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; - +/* if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { // Record the fault. Wait until it's empty in the ROB. // Then handle the trap. Ignore it if there's already a @@ -713,7 +713,7 @@ DefaultCommit::commit() commitStatus[0] = Running; } } - +*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -925,7 +925,7 @@ DefaultCommit::commitInsts() numCommittedDist.sample(num_committed); if (num_committed == commitWidth) { - commitEligible[0]++; + commitEligibleSamples[0]++; } } @@ -947,6 +947,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->reachedCommit = true; if (head_inst->isNonSpeculative() || + head_inst->isStoreConditional() || head_inst->isMemBarrier() || head_inst->isWriteBarrier()) { diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index 8db65d501..f4b19bfb3 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -67,6 +67,11 @@ class BaseFullCPU : public BaseCPU int cpu_id; }; +/** + * FullO3CPU class, has each of the stages (fetch through commit) + * within it, as well as all of the time buffers between stages. The + * tick() function for the CPU is defined here. + */ template class FullO3CPU : public BaseFullCPU { @@ -194,17 +199,13 @@ class FullO3CPU : public BaseFullCPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Check if there are any system calls pending. */ - void checkSyscalls(); - - /** Switches out this CPU. - */ + /** Switches out this CPU. */ void switchOut(Sampler *sampler); + /** Signals to this CPU that a stage has completed switching out. */ void signalSwitched(); - /** Takes over from another CPU. - */ + /** Takes over from another CPU. */ void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ @@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU #endif - // - // New accessors for new decoder. - // + /** Register accessors. Index refers to the physical register index. */ uint64_t readIntReg(int reg_idx); float readFloatRegSingle(int reg_idx); @@ -271,6 +270,11 @@ class FullO3CPU : public BaseFullCPU uint64_t readArchFloatRegInt(int reg_idx, unsigned tid); + /** Architectural register accessors. Looks up in the commit + * rename table to obtain the true physical index of the + * architected register first, then accesses that physical + * register. + */ void setArchIntReg(int reg_idx, uint64_t val, unsigned tid); void setArchFloatRegSingle(int reg_idx, float val, unsigned tid); @@ -279,13 +283,17 @@ class FullO3CPU : public BaseFullCPU void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); + /** Reads the commit PC of a specific thread. */ uint64_t readPC(unsigned tid); - void setPC(Addr new_PC,unsigned tid); + /** Sets the commit PC of a specific thread. */ + void setPC(Addr new_PC, unsigned tid); + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid); - void setNextPC(uint64_t val,unsigned tid); + /** Sets the next PC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid); /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. @@ -309,21 +317,15 @@ class FullO3CPU : public BaseFullCPU /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); + /** Removes the instruction pointed to by the iterator. */ inline void squashInstIt(const ListIt &instIt, const unsigned &tid); + /** Cleans up all instructions on the remove list. */ void cleanUpRemovedInsts(); - /** Remove all instructions from the list. */ -// void removeAllInsts(); - + /** Debug function to print all instructions on the list. */ void dumpInsts(); - /** Basically a wrapper function so that instructions executed at - * commit can tell the instruction queue that they have - * completed. Eventually this hack should be removed. - */ -// void wakeDependents(DynInstPtr &inst); - public: /** List of all the instructions in flight. */ std::list instList; @@ -334,6 +336,9 @@ class FullO3CPU : public BaseFullCPU std::queue removeList; #ifdef DEBUG + /** Debug structure to keep track of the sequence numbers still in + * flight. + */ std::set snList; #endif @@ -420,14 +425,22 @@ class FullO3CPU : public BaseFullCPU /** The IEW stage's instruction queue. */ TimeBuffer iewQueue; - public: + private: + /** The activity recorder; used to tell if the CPU has any + * activity remaining or if it can go to idle and deschedule + * itself. + */ ActivityRecorder activityRec; + public: + /** Records that there was time buffer activity this cycle. */ void activityThisCycle() { activityRec.activity(); } + /** Changes a stage's status to active within the activity recorder. */ void activateStage(const StageIdx idx) { activityRec.activateStage(idx); } + /** Changes a stage's status to inactive within the activity recorder. */ void deactivateStage(const StageIdx idx) { activityRec.deactivateStage(idx); } @@ -438,7 +451,7 @@ class FullO3CPU : public BaseFullCPU int getFreeTid(); public: - /** Temporary function to get pointer to exec context. */ + /** Returns a pointer to a thread's exec context. */ ExecContext *xcBase(unsigned tid) { return thread[tid]->getXCProxy(); @@ -447,6 +460,10 @@ class FullO3CPU : public BaseFullCPU /** The global sequence number counter. */ InstSeqNum globalSeqNum; + /** Pointer to the checker, which can dynamically verify + * instruction results at run time. This can be set to NULL if it + * is not being used. + */ Checker *checker; #if FULL_SYSTEM @@ -462,11 +479,13 @@ class FullO3CPU : public BaseFullCPU /** Pointer to memory. */ FunctionalMemory *mem; + /** Pointer to the sampler */ Sampler *sampler; + /** Counter of how many stages have completed switching out. */ int switchCount; - // List of all ExecContexts. + /** Pointers to all of the threads in the CPU. */ std::vector thread; #if 0 diff --git a/cpu/o3/cpu_policy.hh b/cpu/o3/cpu_policy.hh index b4249b12d..c30e58389 100644 --- a/cpu/o3/cpu_policy.hh +++ b/cpu/o3/cpu_policy.hh @@ -48,24 +48,50 @@ #include "cpu/o3/comm.hh" +/** + * Struct that defines the key classes to be used by the CPU. All + * classes use the typedefs defined here to determine what are the + * classes of the other stages and communication buffers. In order to + * change a structure such as the IQ, simply change the typedef here + * to use the desired class instead, and recompile. In order to + * create a different CPU to be used simultaneously with this one, see + * the alpha_impl.hh file for instructions. + */ template struct SimpleCPUPolicy { + /** Typedef for the branch prediction unit (which includes the BP, + * RAS, and BTB). + */ typedef BPredUnit BPredUnit; + /** Typedef for the register file. Most classes assume a unified + * physical register file. + */ typedef PhysRegFile RegFile; + /** Typedef for the freelist of registers. */ typedef SimpleFreeList FreeList; + /** Typedef for the rename map. */ typedef SimpleRenameMap RenameMap; + /** Typedef for the ROB. */ typedef ROB ROB; + /** Typedef for the instruction queue/scheduler. */ typedef InstructionQueue IQ; + /** Typedef for the memory dependence unit. */ typedef MemDepUnit MemDepUnit; + /** Typedef for the LSQ. */ typedef LSQ LSQ; + /** Typedef for the thread-specific LSQ units. */ typedef LSQUnit LSQUnit; - + /** Typedef for fetch. */ typedef DefaultFetch Fetch; + /** Typedef for decode. */ typedef DefaultDecode Decode; + /** Typedef for rename. */ typedef DefaultRename Rename; + /** Typedef for Issue/Execute/Writeback. */ typedef DefaultIEW IEW; + /** Typedef for commit. */ typedef DefaultCommit Commit; /** The struct for communication between fetch and decode. */ diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh index 3035b3387..b336575a8 100644 --- a/cpu/o3/decode.hh +++ b/cpu/o3/decode.hh @@ -107,9 +107,12 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list *at_ptr); + /** Switches out the decode stage. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Ticks decode, processing all input signals and decoding as many * instructions as possible. */ diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 8d84d46c8..0b686375e 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -41,6 +41,7 @@ DefaultDecode::DefaultDecode(Params *params) { _status = Inactive; + // Setup status, make sure stall signals are clear. for (int i = 0; i < numThreads; ++i) { decodeStatus[i] = Idle; @@ -165,6 +166,7 @@ template void DefaultDecode::switchOut() { + // Decode can immediately switch out. cpu->signalSwitched(); } @@ -174,6 +176,7 @@ DefaultDecode::takeOverFrom() { _status = Inactive; + // Be sure to reset state and clear out any old instructions. for (int i = 0; i < numThreads; ++i) { decodeStatus[i] = Idle; @@ -222,22 +225,22 @@ DefaultDecode::block(unsigned tid) { DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid); - // If the decode status is blocked or unblocking then decode has not yet - // signalled fetch to unblock. In that case, there is no need to tell - // fetch to block. - if (decodeStatus[tid] != Blocked && - decodeStatus[tid] != Unblocking) { - toFetch->decodeBlock[tid] = true; - wroteToTimeBuffer = true; - } - // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. skidInsert(tid); + // If the decode status is blocked or unblocking then decode has not yet + // signalled fetch to unblock. In that case, there is no need to tell + // fetch to block. if (decodeStatus[tid] != Blocked) { // Set the status to Blocked. decodeStatus[tid] = Blocked; + + if (decodeStatus[tid] != Unblocking) { + toFetch->decodeBlock[tid] = true; + wroteToTimeBuffer = true; + } + return true; } @@ -270,13 +273,16 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction " "detected at decode.\n", tid); + // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); - toFetch->decodeInfo[tid].branchTaken = true; + toFetch->decodeInfo[tid].branchTaken = + inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { toFetch->decodeUnblock[tid] = 1; @@ -292,11 +298,12 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) } } + // Clear the instruction list and skid buffer in case they have any + // insts in them. while (!insts[tid].empty()) { insts[tid].pop(); } - // Clear the skid buffer in case it has any data in it. while (!skidBuffer[tid].empty()) { skidBuffer[tid].pop(); } @@ -341,11 +348,12 @@ DefaultDecode::squash(unsigned tid) } } + // Clear the instruction list and skid buffer in case they have any + // insts in them. while (!insts[tid].empty()) { insts[tid].pop(); } - // Clear the skid buffer in case it has any data in it. while (!skidBuffer[tid].empty()) { skidBuffer[tid].pop(); } diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh index f8ae38da4..b6c5f1ab1 100644 --- a/cpu/o3/dep_graph.hh +++ b/cpu/o3/dep_graph.hh @@ -4,6 +4,7 @@ #include "cpu/o3/comm.hh" +/** Node in a linked list. */ template class DependencyEntry { @@ -18,32 +19,50 @@ class DependencyEntry DependencyEntry *next; }; +/** Array of linked list that maintains the dependencies between + * producing instructions and consuming instructions. Each linked + * list represents a single physical register, having the future + * producer of the register's value, and all consumers waiting on that + * value on the list. The head node of each linked list represents + * the producing instruction of that register. Instructions are put + * on the list upon reaching the IQ, and are removed from the list + * either when the producer completes, or the instruction is squashed. +*/ template class DependencyGraph { public: typedef DependencyEntry DepEntry; + /** Default construction. Must call resize() prior to use. */ DependencyGraph() : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); + /** Clears all of the linked lists. */ void reset(); + /** Inserts an instruction to be dependent on the given index. */ void insert(PhysRegIndex idx, DynInstPtr &new_inst); + /** Sets the producing instruction of a given register. */ void setInst(PhysRegIndex idx, DynInstPtr &new_inst) { dependGraph[idx].inst = new_inst; } + /** Clears the producing instruction. */ void clearInst(PhysRegIndex idx) { dependGraph[idx].inst = NULL; } + /** Removes an instruction from a single linked list. */ void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + /** Removes and returns the newest dependent of a specific register. */ DynInstPtr pop(PhysRegIndex idx); + /** Checks if there are any dependents on a specific register. */ bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } /** Debugging function to dump out the dependency graph. @@ -59,13 +78,16 @@ class DependencyGraph */ DepEntry *dependGraph; + /** Number of linked lists; identical to the number of registers. */ int numEntries; // Debug variable, remove when done testing. unsigned memAllocCounter; public: + // Debug variable, remove when done testing. uint64_t nodesTraversed; + // Debug variable, remove when done testing. uint64_t nodesRemoved; }; diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index 3fcfdc3a1..92a87ab54 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -42,7 +42,7 @@ class Sampler; * width is specified by the parameters; each cycle it tries to fetch * that many instructions. It supports using a branch predictor to * predict direction and targets. - * It supports the idling functionalitiy of the CPU by indicating to + * It supports the idling functionality of the CPU by indicating to * the CPU when it is active and inactive. */ template @@ -163,14 +163,19 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(MemReqPtr &req); + /** Begins the switch out of the fetch stage. */ void switchOut(); + /** Completes the switch out of the fetch stage. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Checks if the fetch stage is switched out. */ bool isSwitchedOut() { return switchedOut; } + /** Tells fetch to wake up from a quiesce instruction. */ void wakeFromQuiesce(); private: @@ -301,8 +306,10 @@ class DefaultFetch /** BPredUnit. */ BPredUnit branchPred; + /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; + /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; /** Memory request used to access cache. */ @@ -369,8 +376,12 @@ class DefaultFetch /** Thread ID being fetched. */ int threadFetched; + /** Checks if there is an interrupt pending. If there is, fetch + * must stop once it is not fetching PAL instructions. + */ bool interruptPending; + /** Records if fetch is switched out. */ bool switchedOut; #if !FULL_SYSTEM @@ -394,17 +405,23 @@ class DefaultFetch * the pipeline. */ Stats::Scalar<> fetchIdleCycles; + /** Total number of cycles spent blocked. */ Stats::Scalar<> fetchBlockedCycles; - + /** Total number of cycles spent in any other state. */ Stats::Scalar<> fetchMiscStallCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; - + /** Total number of outstanding icache accesses that were dropped + * due to a squash. + */ Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; + /** Rate of how often fetch was idle. */ Stats::Formula idleRate; + /** Number of branch fetches per cycle. */ Stats::Formula branchRate; + /** Number of instruction fetched per cycle. */ Stats::Formula fetchRate; }; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index 1c5e508f6..a309bd49a 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -161,59 +161,59 @@ void DefaultFetch::regStats() { icacheStallCycles - .name(name() + ".FETCH:icacheStallCycles") + .name(name() + ".icacheStallCycles") .desc("Number of cycles fetch is stalled on an Icache miss") .prereq(icacheStallCycles); fetchedInsts - .name(name() + ".FETCH:Insts") + .name(name() + ".Insts") .desc("Number of instructions fetch has processed") .prereq(fetchedInsts); fetchedBranches - .name(name() + ".FETCH:Branches") + .name(name() + ".Branches") .desc("Number of branches that fetch encountered") .prereq(fetchedBranches); predictedBranches - .name(name() + ".FETCH:predictedBranches") + .name(name() + ".predictedBranches") .desc("Number of branches that fetch has predicted taken") .prereq(predictedBranches); fetchCycles - .name(name() + ".FETCH:Cycles") + .name(name() + ".Cycles") .desc("Number of cycles fetch has run and was not squashing or" " blocked") .prereq(fetchCycles); fetchSquashCycles - .name(name() + ".FETCH:SquashCycles") + .name(name() + ".SquashCycles") .desc("Number of cycles fetch has spent squashing") .prereq(fetchSquashCycles); fetchIdleCycles - .name(name() + ".FETCH:IdleCycles") + .name(name() + ".IdleCycles") .desc("Number of cycles fetch was idle") .prereq(fetchIdleCycles); fetchBlockedCycles - .name(name() + ".FETCH:BlockedCycles") + .name(name() + ".BlockedCycles") .desc("Number of cycles fetch has spent blocked") .prereq(fetchBlockedCycles); fetchedCacheLines - .name(name() + ".FETCH:CacheLines") + .name(name() + ".CacheLines") .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); fetchMiscStallCycles - .name(name() + ".FETCH:MiscStallCycles") + .name(name() + ".MiscStallCycles") .desc("Number of cycles fetch has spent waiting on interrupts, or " "bad addresses, or out of MSHRs") .prereq(fetchMiscStallCycles); fetchIcacheSquashes - .name(name() + ".FETCH:IcacheSquashes") + .name(name() + ".IcacheSquashes") .desc("Number of outstanding Icache misses that were squashed") .prereq(fetchIcacheSquashes); @@ -221,24 +221,24 @@ DefaultFetch::regStats() .init(/* base value */ 0, /* last value */ fetchWidth, /* bucket size */ 1) - .name(name() + ".FETCH:rateDist") + .name(name() + ".rateDist") .desc("Number of instructions fetched each cycle (Total)") .flags(Stats::pdf); idleRate - .name(name() + ".FETCH:idleRate") + .name(name() + ".idleRate") .desc("Percent of cycles fetch was idle") .prereq(idleRate); idleRate = fetchIdleCycles * 100 / cpu->numCycles; branchRate - .name(name() + ".FETCH:branchRate") + .name(name() + ".branchRate") .desc("Number of branch fetches per cycle") .flags(Stats::total); - branchRate = predictedBranches / cpu->numCycles; + branchRate = fetchedBranches / cpu->numCycles; fetchRate - .name(name() + ".FETCH:rate") + .name(name() + ".rate") .desc("Number of inst fetches per cycle") .flags(Stats::total); fetchRate = fetchedInsts / cpu->numCycles; @@ -307,6 +307,7 @@ template void DefaultFetch::initStage() { + // Setup PC and nextPC with initial state. for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); @@ -323,8 +324,6 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) // Only change the status if it's still waiting on the icache access // to return. - // Can keep track of how many cache accesses go unused due to - // misspeculation here. if (fetchStatus[tid] != IcacheMissStall || req != memReq[tid] || isSwitchedOut()) { @@ -358,6 +357,7 @@ template void DefaultFetch::switchOut() { + // Fetch is ready to switch out at any time. switchedOut = true; cpu->signalSwitched(); } @@ -366,6 +366,7 @@ template void DefaultFetch::doSwitchOut() { + // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -396,6 +397,7 @@ DefaultFetch::wakeFromQuiesce() { DPRINTF(Fetch, "Waking up from quiesce\n"); // Hopefully this is safe + // @todo: Allow other threads to wake from quiesce. fetchStatus[0] = Running; } @@ -831,7 +833,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) } } - if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) { + if (fetchStatus[tid] != IcacheMissStall && checkStall(tid)) { DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); fetchStatus[tid] = Blocked; @@ -1199,7 +1201,7 @@ DefaultFetch::lsqCount() if (fetchStatus[high_pri] == Running || fetchStatus[high_pri] == IcacheMissComplete || - fetchStatus[high_pri] == Idle) + fetchStatus[high_pri] == Idle) return high_pri; else PQ.pop(); diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc index fb2b5c00d..b28b5d37f 100644 --- a/cpu/o3/fu_pool.cc +++ b/cpu/o3/fu_pool.cc @@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability) } } + assert(fu_idx < numFU); + unitBusy[fu_idx] = true; return fu_idx; diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh index da6fdc802..052e4832d 100644 --- a/cpu/o3/fu_pool.hh +++ b/cpu/o3/fu_pool.hh @@ -155,7 +155,10 @@ class FUPool : public SimObject return maxIssueLatencies[capability]; } + /** Switches out functional unit pool. */ void switchOut(); + + /** Takes over from another CPU's thread. */ void takeOverFrom(); }; diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index 935320628..eda6a6bc0 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -160,12 +160,16 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); + /** Starts switch out of IEW stage. */ void switchOut(); + /** Completes switch out of IEW stage. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Returns if IEW is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Sets page table pointer within LSQ. */ @@ -287,6 +291,7 @@ class DefaultIEW void tick(); private: + /** Updates execution stats based on the instruction. */ void updateExeInstStats(DynInstPtr &inst); /** Pointer to main time buffer used for backwards communication. */ @@ -429,6 +434,7 @@ class DefaultIEW /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** Is this stage switched out. */ bool switchedOut; /** Stat for total number of idle cycles. */ @@ -470,9 +476,13 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Number of executed software prefetches. */ Stats::Vector<> exeSwp; + /** Number of executed nops. */ Stats::Vector<> exeNop; + /** Number of executed meomory references. */ Stats::Vector<> exeRefs; + /** Number of executed branches. */ Stats::Vector<> exeBranches; // Stats::Vector<> issued_ops; @@ -482,19 +492,30 @@ class DefaultIEW Stats::Vector<> dist_unissued; Stats::Vector2d<> stat_issued_inst_type; */ + /** Number of instructions issued per cycle. */ Stats::Formula issueRate; + /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; // Stats::Formula issue_op_rate; // Stats::Formula fu_busy_rate; - + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; + /** Number of instructions that writeback. */ Stats::Vector<> writebackCount; + /** Number of instructions that wake consumers. */ Stats::Vector<> producerInst; + /** Number of instructions that wake up from producers. */ Stats::Vector<> consumerInst; + /** Number of instructions that were delayed in writing back due + * to resource contention. + */ Stats::Vector<> wbPenalized; + /** Number of instructions per cycle written back. */ Stats::Formula wbRate; + /** Average number of woken instructions per writeback. */ Stats::Formula wbFanout; + /** Number of instructions per cycle delayed in writing back . */ Stats::Formula wbPenalizedRate; }; diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index b0137d7fc..3ed20cb75 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -433,6 +433,7 @@ template void DefaultIEW::switchOut() { + // IEW is ready to switch out at any time. cpu->signalSwitched(); } @@ -440,6 +441,7 @@ template void DefaultIEW::doSwitchOut() { + // Clear any state. switchedOut = true; instQueue.switchOut(); @@ -458,6 +460,7 @@ template void DefaultIEW::takeOverFrom() { + // Reset all state. _status = Active; exeStatus = Running; wbStatus = Idle; @@ -571,6 +574,7 @@ DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readPC(); + // Must include the broadcasted SN in the squash. toCommit->includeSquashInst[tid] = true; ldstQueue.setLoadBlockedHandled(tid); @@ -1104,6 +1108,7 @@ DefaultIEW::dispatchInsts(unsigned tid) // Store conditionals need to be set as "canCommit()" // so that commit can process them when they reach the // head of commit. + // @todo: This is somewhat specific to Alpha. inst->setCanCommit(); instQueue.insertNonSpec(inst); add_to_iq = false; @@ -1363,6 +1368,7 @@ DefaultIEW::executeInsts() } } + // Update and record activity if we processed any instructions. if (inst_num) { if (exeStatus == Idle) { exeStatus = Running; @@ -1413,8 +1419,10 @@ DefaultIEW::writebackInsts() scoreboard->setReg(inst->renamedDestRegIdx(i)); } - producerInst[tid]++; - consumerInst[tid]+= dependents; + if (dependents) { + producerInst[tid]++; + consumerInst[tid]+= dependents; + } writebackCount[tid]++; } } @@ -1485,6 +1493,7 @@ DefaultIEW::tick() DPRINTF(IEW,"Processing [tid:%i]\n",tid); + // Update structures based on instructions committed. if (fromCommit->commitInfo[tid].doneSeqNum != 0 && !fromCommit->commitInfo[tid].squash && !fromCommit->commitInfo[tid].robSquashing) { diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 518de73d9..4802cbaf4 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -92,6 +92,9 @@ class InstructionQueue /** Pointer back to the instruction queue. */ InstructionQueue *iqPtr; + /** Should the FU be added to the list to be freed upon + * completing this event. + */ bool freeFU; public: @@ -116,6 +119,7 @@ class InstructionQueue /** Registers statistics. */ void regStats(); + /** Resets all instruction queue state. */ void resetState(); /** Sets CPU pointer. */ @@ -133,10 +137,13 @@ class InstructionQueue /** Sets the global time buffer. */ void setTimeBuffer(TimeBuffer *tb_ptr); + /** Switches out the instruction queue. */ void switchOut(); + /** Takes over execution from another CPU's thread. */ void takeOverFrom(); + /** Returns if the IQ is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Number of entries needed for given amount of threads. */ @@ -171,6 +178,9 @@ class InstructionQueue */ void insertBarrier(DynInstPtr &barr_inst); + /** Returns the oldest scheduled instruction, and removes it from + * the list of instructions waiting to execute. + */ DynInstPtr getInstToExecute(); /** @@ -274,13 +284,15 @@ class InstructionQueue /** List of all the instructions in the IQ (some of which may be issued). */ std::list instList[Impl::MaxThreads]; + /** List of instructions that are ready to be executed. */ std::list instsToExecute; /** - * Struct for comparing entries to be added to the priority queue. This - * gives reverse ordering to the instructions in terms of sequence - * numbers: the instructions with smaller sequence numbers (and hence - * are older) will be at the top of the priority queue. + * Struct for comparing entries to be added to the priority queue. + * This gives reverse ordering to the instructions in terms of + * sequence numbers: the instructions with smaller sequence + * numbers (and hence are older) will be at the top of the + * priority queue. */ struct pqCompare { bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const @@ -393,6 +405,7 @@ class InstructionQueue */ unsigned commitToIEWDelay; + /** Is the IQ switched out. */ bool switchedOut; /** The sequence number of the squashed instruction. */ @@ -460,19 +473,28 @@ class InstructionQueue */ Stats::Scalar<> iqSquashedNonSpecRemoved; + /** Distribution of number of instructions in the queue. */ Stats::VectorDistribution<> queueResDist; + /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; + /** Distribution of the cycles it takes to issue an instruction. */ Stats::VectorDistribution<> issueDelayDist; + /** Number of times an instruction could not be issued because a + * FU was busy. + */ Stats::Vector<> statFuBusy; // Stats::Vector<> dist_unissued; + /** Stat for total number issued for each instruction type. */ Stats::Vector2d<> statIssuedInstType; + /** Number of instructions issued per cycle. */ Stats::Formula issueRate; // Stats::Formula issue_stores; // Stats::Formula issue_op_rate; - Stats::Vector<> fuBusy; //cumulative fu busy - + /** Number of times the FU was busy. */ + Stats::Vector<> fuBusy; + /** Number of times the FU was busy per instruction issued. */ Stats::Formula fuBusyRate; }; diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index f1dc4e01f..d677a259c 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -151,8 +151,10 @@ template InstructionQueue::~InstructionQueue() { dependGraph.reset(); +#ifdef DEBUG cprintf("Nodes traversed: %i, removed: %i\n", dependGraph.nodesTraversed, dependGraph.nodesRemoved); +#endif } template @@ -669,14 +671,8 @@ InstructionQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) // @todo: Ensure that these FU Completions happen at the beginning // of a cycle, otherwise they could add too many instructions to // the queue. - // @todo: This could break if there's multiple multi-cycle ops - // finishing on this cycle. Maybe implement something like - // instToCommit in iew_impl.hh. issueToExecuteQueue->access(0)->size++; instsToExecute.push_back(inst); -// int &size = issueToExecuteQueue->access(0)->size; - -// issueToExecuteQueue->access(0)->insts[size++] = inst; } // @todo: Figure out a better way to remove the squashed items from the @@ -742,9 +738,10 @@ InstructionQueue::scheduleReadyInsts() } } + // If we have an instruction that doesn't require a FU, or a + // valid FU, then schedule for execution. if (idx == -2 || idx != -1) { if (op_latency == 1) { -// i2e_info->insts[exec_queue_slot++] = issuing_inst; i2e_info->size++; instsToExecute.push_back(issuing_inst); @@ -762,14 +759,10 @@ InstructionQueue::scheduleReadyInsts() // @todo: Enforce that issue_latency == 1 or op_latency if (issue_latency > 1) { + // If FU isn't pipelined, then it must be freed + // upon the execution completing. execution->setFreeFU(); } else { - // @todo: Not sure I'm accounting for the - // multi-cycle op in a pipelined FU properly, or - // the number of instructions issued in one cycle. -// i2e_info->insts[exec_queue_slot++] = issuing_inst; -// i2e_info->size++; - // Add the FU onto the list of FU's to be freed next cycle. fuPool->freeUnitNextCycle(idx); } @@ -814,6 +807,7 @@ InstructionQueue::scheduleReadyInsts() numIssuedDist.sample(total_issued); iqInstsIssued+= total_issued; + // If we issued any instructions, tell the CPU we had activity. if (total_issued) { cpu->activityThisCycle(); } else { @@ -1364,4 +1358,45 @@ InstructionQueue::dumpInsts() ++num; } } + + cprintf("Insts to Execute list:\n"); + + int num = 0; + int valid_num = 0; + ListIt inst_list_it = instsToExecute.begin(); + + while (inst_list_it != instsToExecute.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed + // still count towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } } diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh index a1eeccbe7..b321d4590 100644 --- a/cpu/o3/lsq.hh +++ b/cpu/o3/lsq.hh @@ -49,6 +49,7 @@ class LSQ { typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; + /** SMT policy. */ enum LSQPolicy { Dynamic, Partitioned, @@ -69,8 +70,9 @@ class LSQ { void setIEW(IEW *iew_ptr); /** Sets the page table pointer. */ // void setPageTable(PageTable *pt_ptr); - + /** Switches out the LSQ. */ void switchOut(); + /** Takes over execution from another CPU's thread. */ void takeOverFrom(); /** Number of entries needed for the given amount of threads.*/ @@ -95,9 +97,6 @@ class LSQ { /** Executes a load. */ Fault executeLoad(DynInstPtr &inst); - Fault executeLoad(int lq_idx, unsigned tid) - { return thread[tid].executeLoad(lq_idx); } - /** Executes a store. */ Fault executeStore(DynInstPtr &inst); diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index 942b4583d..a6afff743 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -112,10 +112,13 @@ class LSQUnit { /** Sets the page table pointer. */ // void setPageTable(PageTable *pt_ptr); + /** Switches out LSQ unit. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Returns if the LSQ is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Ticks the LSQ unit, which in this case only resets the number of @@ -180,12 +183,15 @@ class LSQUnit { bool loadBlocked() { return isLoadBlocked; } + /** Clears the signal that a load became blocked. */ void clearLoadBlocked() { isLoadBlocked = false; } + /** Returns if the blocked load was handled. */ bool isLoadBlockedHandled() { return loadBlockedHandled; } + /** Records the blocked load as being handled. */ void setLoadBlockedHandled() { loadBlockedHandled = true; } @@ -331,6 +337,7 @@ class LSQUnit { /** The number of used cache ports in this cycle. */ int usedPorts; + /** Is the LSQ switched out. */ bool switchedOut; //list mshrSeqNums; @@ -350,8 +357,10 @@ class LSQUnit { /** Whether or not a load is blocked due to the memory system. */ bool isLoadBlocked; + /** Has the blocked load been handled. */ bool loadBlockedHandled; + /** The sequence number of the blocked load. */ InstSeqNum blockedLoadSeqNum; /** The oldest load that caused a memory ordering violation. */ @@ -452,10 +461,10 @@ LSQUnit::read(MemReqPtr &req, T &data, int load_idx) cpu->lockFlag = true; } #endif - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; while (store_idx != -1) { // End once we've reached the top of the LSQ diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 10f2b5572..4ee8bb234 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -477,7 +477,6 @@ LSQUnit::commitLoad() DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", loadQueue[loadHead]->readPC()); - loadQueue[loadHead] = NULL; incrLdIdx(loadHead); diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh index acbe08ec2..bb0406de1 100644 --- a/cpu/o3/mem_dep_unit.hh +++ b/cpu/o3/mem_dep_unit.hh @@ -84,8 +84,10 @@ class MemDepUnit { /** Registers statistics. */ void regStats(); + /** Switches out the memory dependence predictor. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Sets the pointer to the IQ. */ @@ -155,10 +157,12 @@ class MemDepUnit { : inst(new_inst), regsReady(false), memDepReady(false), completed(false), squashed(false) { +#ifdef DEBUG ++memdep_count; DPRINTF(MemDepUnit, "Memory dependency entry created. " "memdep_count=%i\n", memdep_count); +#endif } /** Frees any pointers. */ @@ -167,11 +171,12 @@ class MemDepUnit { for (int i = 0; i < dependInsts.size(); ++i) { dependInsts[i] = NULL; } - +#ifdef DEBUG --memdep_count; DPRINTF(MemDepUnit, "Memory dependency entry deleted. " "memdep_count=%i\n", memdep_count); +#endif } /** Returns the name of the memory dependence entry. */ @@ -196,9 +201,11 @@ class MemDepUnit { bool squashed; /** For debugging. */ +#ifdef DEBUG static int memdep_count; static int memdep_insert; static int memdep_erase; +#endif }; /** Finds the memory dependence entry in the hash map. */ @@ -227,9 +234,13 @@ class MemDepUnit { */ MemDepPred depPred; + /** Is there an outstanding load barrier that loads must wait on. */ bool loadBarrier; + /** The sequence number of the load barrier. */ InstSeqNum loadBarrierSN; + /** Is there an outstanding store barrier that loads must wait on. */ bool storeBarrier; + /** The sequence number of the store barrier. */ InstSeqNum storeBarrierSN; /** Pointer to the IQ. */ diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index 8b195baab..595e9293f 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -105,6 +105,7 @@ template void MemDepUnit::switchOut() { + // Clear any state. for (int i = 0; i < Impl::MaxThreads; ++i) { instList[i].clear(); } @@ -116,6 +117,7 @@ template void MemDepUnit::takeOverFrom() { + // Be sure to reset all state. loadBarrier = storeBarrier = false; loadBarrierSN = storeBarrierSN = 0; depPred.clear(); @@ -146,7 +148,7 @@ MemDepUnit::insert(DynInstPtr &inst) inst_entry->listIt = --(instList[tid].end()); // Check any barriers and the dependence predictor for any - // producing stores. + // producing memrefs/stores. InstSeqNum producing_store; if (inst->isLoad() && loadBarrier) { producing_store = loadBarrierSN; @@ -253,6 +255,7 @@ void MemDepUnit::insertBarrier(DynInstPtr &barr_inst) { InstSeqNum barr_sn = barr_inst->seqNum; + // Memory barriers block loads and stores, write barriers only stores. if (barr_inst->isMemBarrier()) { loadBarrier = true; loadBarrierSN = barr_sn; @@ -330,6 +333,7 @@ MemDepUnit::replay(DynInstPtr &inst) DynInstPtr temp_inst; bool found_inst = false; + // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { temp_inst = instsToReplay.front(); diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index 3f1a27bb5..4912431ad 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -155,10 +155,13 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Switches out the rename stage. */ void switchOut(); + /** Completes the switch out. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Squashes all instructions in a thread. */ @@ -243,8 +246,10 @@ class DefaultRename /** Checks if any stages are telling rename to block. */ bool checkStall(unsigned tid); + /** Gets the number of free entries for a specific thread. */ void readFreeEntries(unsigned tid); + /** Checks the signals and updates the status. */ bool checkSignalsAndUpdate(unsigned tid); /** Either serializes on the next instruction available in the InstQueue, @@ -454,8 +459,11 @@ class DefaultRename Stats::Scalar<> renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ Stats::Scalar<> renameUndoneMaps; + /** Number of serialize instructions handled. */ Stats::Scalar<> renamedSerializing; + /** Number of instructions marked as temporarily serializing. */ Stats::Scalar<> renamedTempSerializing; + /** Number of instructions inserted into skid buffers. */ Stats::Scalar<> renameSkidInsts; }; diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index b4f1077d1..829c99584 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -258,6 +258,7 @@ template void DefaultRename::switchOut() { + // Rename is ready to switch out at any time. cpu->signalSwitched(); } @@ -265,6 +266,7 @@ template void DefaultRename::doSwitchOut() { + // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { typename list::iterator hb_it = historyBuffer[i].begin(); diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh index d7e49ae83..1ac627264 100644 --- a/cpu/o3/rename_map.hh +++ b/cpu/o3/rename_map.hh @@ -62,12 +62,13 @@ class SimpleRenameMap typedef std::pair RenameInfo; public: - //Constructor - SimpleRenameMap() {}; + /** Default constructor. init() must be called prior to use. */ + SimpleRenameMap() {}; /** Destructor. */ ~SimpleRenameMap(); + /** Initializes rename map with given parameters. */ void init(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, PhysRegIndex &_int_reg_start, @@ -84,6 +85,7 @@ class SimpleRenameMap int id, bool bindRegs); + /** Sets the free list used with this rename map. */ void setFreeList(SimpleFreeList *fl_ptr); //Tell rename map to get a free physical register for a given @@ -149,7 +151,6 @@ class SimpleRenameMap { } }; - //Change this to private private: /** Integer rename map. */ std::vector intRenameMap; diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh index e05eebe5a..bdbdde32f 100644 --- a/cpu/o3/rob.hh +++ b/cpu/o3/rob.hh @@ -95,8 +95,10 @@ class ROB */ void setActiveThreads(std::list* at_ptr); + /** Switches out the ROB. */ void switchOut(); + /** Takes over another CPU's thread. */ void takeOverFrom(); /** Function to insert an instruction into the ROB. Note that whatever @@ -298,6 +300,7 @@ class ROB /** Number of instructions in the ROB. */ int numInstsInROB; + /** Dummy instruction returned if there are no insts left. */ DynInstPtr dummyInst; private: diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc index 0c957c8c7..67ccf1b55 100644 --- a/cpu/o3/store_set.cc +++ b/cpu/o3/store_set.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "base/intmath.hh" #include "base/trace.hh" #include "cpu/o3/store_set.hh" @@ -36,6 +37,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", SSITSize, LFSTSize); + if (!isPowerOf2(SSITSize)) { + fatal("Invalid SSIT size!\n"); + } + SSIT.resize(SSITSize); validSSIT.resize(SSITSize); @@ -43,6 +48,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) for (int i = 0; i < SSITSize; ++i) validSSIT[i] = false; + if (!isPowerOf2(LFSTSize)) { + fatal("Invalid LFST size!\n"); + } + LFST.resize(LFSTSize); validLFST.resize(LFSTSize); @@ -318,3 +327,19 @@ StoreSet::clear() storeList.clear(); } + +void +StoreSet::dump() +{ + cprintf("storeList.size(): %i\n", storeList.size()); + SeqNumMapIt store_list_it = storeList.begin(); + + int num = 0; + + while (store_list_it != storeList.end()) { + cprintf("%i: [sn:%lli] SSID:%i\n", + num, (*store_list_it).first, (*store_list_it).second); + num++; + store_list_it++; + } +} diff --git a/cpu/o3/store_set.hh b/cpu/o3/store_set.hh index 7189db3ab..5f875131c 100644 --- a/cpu/o3/store_set.hh +++ b/cpu/o3/store_set.hh @@ -44,58 +44,98 @@ struct ltseqnum { } }; +/** + * Implements a store set predictor for determining if memory + * instructions are dependent upon each other. See paper "Memory + * Dependence Prediction using Store Sets" by Chrysos and Emer. SSID + * stands for Store Set ID, SSIT stands for Store Set ID Table, and + * LFST is Last Fetched Store Table. + */ class StoreSet { public: typedef unsigned SSID; public: + /** Default constructor. init() must be called prior to use. */ StoreSet() { }; + /** Creates store set predictor with given table sizes. */ StoreSet(int SSIT_size, int LFST_size); + /** Default destructor. */ ~StoreSet(); + /** Initializes the store set predictor with the given table sizes. */ void init(int SSIT_size, int LFST_size); + /** Records a memory ordering violation between the younger load + * and the older store. */ void violation(Addr store_PC, Addr load_PC); + /** Inserts a load into the store set predictor. This does nothing but + * is included in case other predictors require a similar function. + */ void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + /** Inserts a store into the store set predictor. Updates the + * LFST if the store has a valid SSID. */ void insertStore(Addr store_PC, InstSeqNum store_seq_num, unsigned tid); + /** Checks if the instruction with the given PC is dependent upon + * any store. @return Returns the sequence number of the store + * instruction this PC is dependent upon. Returns 0 if none. + */ InstSeqNum checkInst(Addr PC); + /** Records this PC/sequence number as issued. */ void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + /** Squashes for a specific thread until the given sequence number. */ void squash(InstSeqNum squashed_num, unsigned tid); + /** Resets all tables. */ void clear(); + /** Debug function to dump the contents of the store list. */ + void dump(); + private: + /** Calculates the index into the SSIT based on the PC. */ inline int calcIndex(Addr PC) { return (PC >> offsetBits) & indexMask; } + /** Calculates a Store Set ID based on the PC. */ inline SSID calcSSID(Addr PC) { return ((PC ^ (PC >> 10)) % LFSTSize); } + /** The Store Set ID Table. */ std::vector SSIT; + /** Bit vector to tell if the SSIT has a valid entry. */ std::vector validSSIT; + /** Last Fetched Store Table. */ std::vector LFST; + /** Bit vector to tell if the LFST has a valid entry. */ std::vector validLFST; + /** Map of stores that have been inserted into the store set, but + * not yet issued or squashed. + */ std::map storeList; typedef std::map::iterator SeqNumMapIt; + /** Store Set ID Table size, in entries. */ int SSITSize; + /** Last Fetched Store Table size, in entries. */ int LFSTSize; + /** Mask to obtain the index. */ int indexMask; // HACK: Hardcoded for now. diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh index 2c9788e4b..3f1208ea0 100644 --- a/cpu/o3/thread_state.hh +++ b/cpu/o3/thread_state.hh @@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState { typedef ExecContext::Status Status; typedef typename Impl::FullCPU FullCPU; + /** Current status of the thread. */ Status _status; - // Current instruction + /** Current instruction the thread is committing. Only set and + * used for DTB faults currently. + */ TheISA::MachInst inst; + private: + /** Pointer to the CPU. */ FullCPU *cpu; public: - + /** Whether or not the thread is currently in syscall mode, and + * thus able to be externally updated without squashing. + */ bool inSyscall; + /** Whether or not the thread is currently waiting on a trap, and + * thus able to be externally updated without squashing. + */ bool trapPending; #if FULL_SYSTEM @@ -88,31 +98,44 @@ struct O3ThreadState : public ThreadState { { } #endif + /** Pointer to the ExecContext of this thread. @todo: Don't call + this a proxy.*/ ExecContext *xcProxy; + /** Returns a pointer to the XC of this thread. */ ExecContext *getXCProxy() { return xcProxy; } + /** Returns the status of this thread. */ Status status() const { return _status; } + /** Sets the status of this thread. */ void setStatus(Status new_status) { _status = new_status; } #if !FULL_SYSTEM + /** Returns if this address is a valid instruction address. */ bool validInstAddr(Addr addr) { return process->validInstAddr(addr); } + /** Returns if this address is a valid data address. */ bool validDataAddr(Addr addr) { return process->validDataAddr(addr); } #endif - bool misspeculating() { return false; } - + /** Sets the current instruction being committed. */ void setInst(TheISA::MachInst _inst) { inst = _inst; } + /** Reads the number of instructions functionally executed and + * committed. + */ Counter readFuncExeInst() { return funcExeInst; } + /** Sets the total number of instructions functionally executed + * and committed. + */ void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } #if !FULL_SYSTEM + /** Handles the syscall. */ void syscall() { process->syscall(xcProxy); } #endif }; -- cgit v1.2.3 From 51ed3c3fd9f91a686bf87256c966991e6c57c1ff Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 1 Jun 2006 15:39:45 -0400 Subject: Fix stat bug. --HG-- extra : convert_revision : 3e4df934478de1ef6a84f193d9ef722157ac6baf --- cpu/o3/commit_impl.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 9409697eb..798f30294 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -925,7 +925,7 @@ DefaultCommit::commitInsts() numCommittedDist.sample(num_committed); if (num_committed == commitWidth) { - commitEligibleSamples[0]++; + commitEligibleSamples++; } } -- cgit v1.2.3 From 8671d927d862cdbdf851e74cd07d131679faa7ed Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 1 Jun 2006 15:40:06 -0400 Subject: Add in comments for checker. --HG-- extra : convert_revision : 8921907af0f18313bc66ad2a584fc182526fe1a2 --- cpu/checker/cpu.hh | 23 ++++++++++++++++++++++- cpu/checker/cpu_builder.cc | 30 ++++++++++++++++++++++++++++++ cpu/checker/exec_context.hh | 7 +++++++ cpu/checker/o3_cpu_builder.cc | 30 ++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/cpu/checker/cpu.hh b/cpu/checker/cpu.hh index 37fe59d95..9fcd1037f 100644 --- a/cpu/checker/cpu.hh +++ b/cpu/checker/cpu.hh @@ -64,13 +64,28 @@ class MemInterface; class Checkpoint; class Sampler; +/** + * CheckerCPU class. Dynamically verifies instructions as they are + * completed by making sure that the instruction and its results match + * the independent execution of the benchmark inside the checker. The + * checker verifies instructions in order, regardless of the order in + * which instructions complete. There are certain results that can + * not be verified, specifically the result of a store conditional or + * the values of uncached accesses. In these cases, and with + * instructions marked as "IsUnverifiable", the checker assumes that + * the value from the main CPU's execution is correct and simply + * copies that value. It provides a CheckerExecContext (see + * checker/exec_context.hh) that provides hooks for updating the + * Checker's state through any ExecContext accesses. This allows the + * checker to be able to correctly verify instructions, even with + * external accesses to the ExecContext that change state. + */ class CheckerCPU : public BaseCPU { protected: typedef TheISA::MachInst MachInst; typedef TheISA::MiscReg MiscReg; public: - // main simulation loop (one cycle) virtual void init(); struct Params : public BaseCPU::Params @@ -301,6 +316,12 @@ class CheckerCPU : public BaseCPU InstSeqNum youngestSN; }; +/** + * Templated Checker class. This Checker class is templated on the + * DynInstPtr of the instruction type that will be verified. Proper + * template instantiations of the Checker must be placed at the bottom + * of checker/cpu.cc. + */ template class Checker : public CheckerCPU { diff --git a/cpu/checker/cpu_builder.cc b/cpu/checker/cpu_builder.cc index 397ccab14..d80daef97 100644 --- a/cpu/checker/cpu_builder.cc +++ b/cpu/checker/cpu_builder.cc @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include @@ -10,6 +37,9 @@ #include "sim/process.hh" #include "sim/sim_object.hh" +/** + * Specific non-templated derived class used for SimObject configuration. + */ class OzoneChecker : public Checker > > { public: diff --git a/cpu/checker/exec_context.hh b/cpu/checker/exec_context.hh index 38784867d..9f9fb0fd6 100644 --- a/cpu/checker/exec_context.hh +++ b/cpu/checker/exec_context.hh @@ -38,6 +38,13 @@ namespace Kernel { class Statistics; }; +/** + * Derived ExecContext class for use with the Checker. The template + * parameter is the ExecContext class used by the specific CPU being + * verified. This CheckerExecContext is then used by the main CPU in + * place of its usual ExecContext class. It handles updating the + * checker's state any time state is updated through the ExecContext. + */ template class CheckerExecContext : public ExecContext { diff --git a/cpu/checker/o3_cpu_builder.cc b/cpu/checker/o3_cpu_builder.cc index 125bfa398..410f91352 100644 --- a/cpu/checker/o3_cpu_builder.cc +++ b/cpu/checker/o3_cpu_builder.cc @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include @@ -10,6 +37,9 @@ #include "sim/process.hh" #include "sim/sim_object.hh" +/** + * Specific non-templated derived class used for SimObject configuration. + */ class O3Checker : public Checker > > { public: -- cgit v1.2.3