From c2fcac7c0dd8dff182cb262bdf35d5c67117aa42 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 19 May 2005 01:28:25 -0400 Subject: Fix up code for initial release. The main bug that remains is properly forwarding data from stores to loads, specifically when they are of differing sizes. cpu/base_dyn_inst.cc: Remove unused commented out code. cpu/base_dyn_inst.hh: Fix up comments. cpu/beta_cpu/2bit_local_pred.cc: Reorder code to match header file. cpu/beta_cpu/2bit_local_pred.hh: Update comments. cpu/beta_cpu/alpha_dyn_inst.hh: Remove useless comments. cpu/beta_cpu/alpha_dyn_inst_impl.hh: cpu/beta_cpu/alpha_full_cpu_impl.hh: cpu/beta_cpu/comm.hh: cpu/beta_cpu/iew_impl.hh: Remove unused commented code. cpu/beta_cpu/alpha_full_cpu.hh: Remove obsolete comment. cpu/beta_cpu/alpha_impl.hh: cpu/beta_cpu/full_cpu.hh: Alphabetize includes. cpu/beta_cpu/bpred_unit.hh: Remove unused global history code. cpu/beta_cpu/btb.hh: cpu/beta_cpu/free_list.hh: Use full path in #defines. cpu/beta_cpu/commit.hh: cpu/beta_cpu/decode.hh: Reorder functions. cpu/beta_cpu/commit_impl.hh: Remove obsolete commented code. cpu/beta_cpu/fetch.hh: Remove obsolete comments. cpu/beta_cpu/fetch_impl.hh: cpu/beta_cpu/rename_impl.hh: Remove commented code. cpu/beta_cpu/full_cpu.cc: Remove useless defines. cpu/beta_cpu/inst_queue.hh: Use full path for #defines. cpu/beta_cpu/inst_queue_impl.hh: Reorder functions to match header file. cpu/beta_cpu/mem_dep_unit.hh: Use full path name for #defines. cpu/beta_cpu/ras.hh: Use full path names for #defines. Remove mod operation. cpu/beta_cpu/regfile.hh: Remove unused commented code, fix up current comments. cpu/beta_cpu/tournament_pred.cc: cpu/beta_cpu/tournament_pred.hh: Update programming style. --HG-- extra : convert_revision : fb9d18a853f58a1108ff827e3c123d5b52a0608a --- cpu/base_dyn_inst.cc | 29 +---- cpu/base_dyn_inst.hh | 49 ++++++-- cpu/beta_cpu/2bit_local_pred.cc | 34 +++-- cpu/beta_cpu/2bit_local_pred.hh | 4 + cpu/beta_cpu/alpha_dyn_inst.hh | 3 +- cpu/beta_cpu/alpha_dyn_inst_impl.hh | 1 - cpu/beta_cpu/alpha_full_cpu.hh | 3 + cpu/beta_cpu/alpha_full_cpu_impl.hh | 11 +- cpu/beta_cpu/alpha_impl.hh | 2 +- cpu/beta_cpu/bpred_unit.hh | 11 +- cpu/beta_cpu/btb.hh | 6 +- cpu/beta_cpu/comm.hh | 9 +- cpu/beta_cpu/commit.hh | 13 +- cpu/beta_cpu/commit_impl.hh | 31 ----- cpu/beta_cpu/decode.hh | 9 +- cpu/beta_cpu/fetch.hh | 61 ++++----- cpu/beta_cpu/fetch_impl.hh | 4 +- cpu/beta_cpu/free_list.hh | 22 ++-- cpu/beta_cpu/full_cpu.cc | 5 - cpu/beta_cpu/full_cpu.hh | 13 +- cpu/beta_cpu/iew_impl.hh | 22 ---- cpu/beta_cpu/inst_queue.hh | 40 +++--- cpu/beta_cpu/inst_queue_impl.hh | 240 +++++++++++++++++------------------- cpu/beta_cpu/mem_dep_unit.hh | 25 ++-- cpu/beta_cpu/ras.hh | 8 +- cpu/beta_cpu/regfile.hh | 17 +-- cpu/beta_cpu/rename_impl.hh | 20 +-- cpu/beta_cpu/tournament_pred.cc | 148 +++++++++++----------- cpu/beta_cpu/tournament_pred.hh | 30 ++--- 29 files changed, 376 insertions(+), 494 deletions(-) (limited to 'cpu') diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index ecfe5a4b0..af172f5b0 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -63,11 +63,6 @@ typedef m5::hash_map my_ha my_hash_t thishash; #endif -/** This may need to be specific to an implementation. */ -//int BaseDynInst::instcount = 0; - -//int break_inst = -1; - template BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, @@ -129,31 +124,11 @@ BaseDynInst::initVars() template BaseDynInst::~BaseDynInst() { -/* - if (specMemWrite) { - // Remove effects of this instruction from speculative memory - xc->spec_mem->erase(effAddr); - } -*/ --instcount; DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", instcount); } -/* -template -FunctionalMemory * -BaseDynInst::getMemory(void) -{ - return xc->mem; -} -template -IntReg * -BaseDynInst::getIntegerRegs(void) -{ - return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile); -} -*/ template void BaseDynInst::prefetch(Addr addr, unsigned flags) @@ -369,8 +344,6 @@ BaseDynInst::eaSrcsReady() // EA calc depends on. (i.e. src reg 0 is the source of the data to be // stored) -// StaticInstPtr eaInst = staticInst->eaCompInst(); - for (int i = 1; i < numSrcRegs(); ++i) { if (!_readySrcRegIdx[i]) @@ -380,7 +353,7 @@ BaseDynInst::eaSrcsReady() return true; } -// Forward declaration... +// Forward declaration template class BaseDynInst; template <> diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 509874fad..0c9b43adc 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -78,6 +78,7 @@ class BaseDynInst : public FastAlloc, public RefCounted MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs }; + /** The static inst used by this dyn inst. */ StaticInstPtr staticInst; //////////////////////////////////////////// @@ -99,7 +100,7 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault copySrcTranslate(Addr src); Fault copy(Addr dest); - // Probably should be private... + /** @todo: Consider making this private. */ public: /** Is this instruction valid. */ bool valid; @@ -219,6 +220,7 @@ class BaseDynInst : public FastAlloc, public RefCounted ~BaseDynInst(); private: + /** Function to initialize variables in the constructors. */ void initVars(); public: @@ -244,9 +246,9 @@ class BaseDynInst : public FastAlloc, public RefCounted */ bool doneTargCalc() { return false; } - /** Returns the calculated target of the branch. */ -// Addr readCalcTarg() { return nextPC; } - + /** Returns the next PC. This could be the speculative next PC if it is + * called prior to the actual branch target being calculated. + */ Addr readNextPC() { return nextPC; } /** Set the predicted target of this current instruction. */ @@ -294,7 +296,10 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the branch target address. */ Addr branchTarget() const { return staticInst->branchTarget(PC); } + /** Number of source registers. */ int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + + /** Number of destination registers. */ int8_t numDestRegs() const { return staticInst->numDestRegs(); } // the following are used to track physical register usage @@ -314,8 +319,13 @@ class BaseDynInst : public FastAlloc, public RefCounted return staticInst->srcRegIdx(i); } + /** Returns the result of an integer instruction. */ uint64_t readIntResult() { return instResult.integer; } + + /** Returns the result of a floating point instruction. */ float readFloatResult() { return instResult.fp; } + + /** Returns the result of a floating point (double) instruction. */ double readDoubleResult() { return instResult.dbl; } //Push to .cc file. @@ -328,6 +338,9 @@ class BaseDynInst : public FastAlloc, public RefCounted } } + /** Marks a specific register as ready. + * @todo: Move this to .cc file. + */ void markSrcRegReady(RegIndex src_idx) { ++readyRegs; @@ -339,13 +352,16 @@ class BaseDynInst : public FastAlloc, public RefCounted } } + /** Returns if a source register is ready. */ bool isReadySrcRegIdx(int idx) const { return this->_readySrcRegIdx[idx]; } + /** Sets this instruction as completed. */ void setCompleted() { completed = true; } + /** Returns whethe or not this instruction is completed. */ bool isCompleted() const { return completed; } /** Sets this instruction as ready to issue. */ @@ -393,20 +409,39 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Set the next PC of this instruction (its actual target). */ void setNextPC(uint64_t val) { nextPC = val; } + /** Returns the exec context. + * @todo: Remove this once the ExecContext is no longer used. + */ ExecContext *xcBase() { return xc; } private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. + */ Addr instEffAddr; + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ bool eaCalcDone; public: + /** Sets the effective address. */ void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + + /** Returns the effective address. */ const Addr &getEA() const { return instEffAddr; } + + /** Returns whether or not the eff. addr. calculation has been completed. */ bool doneEACalc() { return eaCalcDone; } + + /** Returns whether or not the eff. addr. source registers are ready. */ bool eaSrcsReady(); public: + /** Load queue index. */ int16_t lqIdx; + + /** Store queue index. */ int16_t sqIdx; }; @@ -439,8 +474,7 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) if (fault == No_Fault) { fault = cpu->read(req, data, lqIdx); - } - else { + } else { // Return a fixed value to keep simulation deterministic even // along misspeculated paths. data = (T)-1; @@ -464,9 +498,6 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } -// storeSize = sizeof(T); -// storeData = data; - MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); req->asid = asid; diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc index e5bf9647f..bcd3ba00d 100644 --- a/cpu/beta_cpu/2bit_local_pred.cc +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -30,21 +30,6 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize, instShiftAmt); } -inline -bool -DefaultBP::getPrediction(uint8_t &count) -{ - // Get the MSB of the count - return (count >> (localCtrBits - 1)); -} - -inline -unsigned -DefaultBP::getLocalIndex(Addr &branch_addr) -{ - return (branch_addr >> instShiftAmt) & indexMask; -} - bool DefaultBP::lookup(Addr &branch_addr) { @@ -91,15 +76,26 @@ DefaultBP::update(Addr &branch_addr, bool taken) assert(local_predictor_idx < localPredictorSize); - // Increment or decrement twice to undo speculative update, then - // properly update if (taken) { DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); localCtrs[local_predictor_idx].increment(); -// localCtrs[local_predictor_idx].increment(); } else { DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); localCtrs[local_predictor_idx].decrement(); -// localCtrs[local_predictor_idx].decrement(); } } + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh index cda7d3e65..6f9b9eedc 100644 --- a/cpu/beta_cpu/2bit_local_pred.hh +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -31,8 +31,12 @@ class DefaultBP private: + /** Returns the taken/not taken prediction given the value of the + * counter. + */ inline bool getPrediction(uint8_t &count); + /** Calculates the local index based on the PC. */ inline unsigned getLocalIndex(Addr &PC); /** Array of counters that make up the local predictor. */ diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index d34fa071c..61770d59d 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -1,5 +1,3 @@ -//Todo: - #ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ #define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ @@ -123,6 +121,7 @@ class AlphaDynInst : public BaseDynInst { return this->cpu->readFloatRegInt(_srcRegIdx[idx]); } + /** @todo: Make results into arrays so they can handle multiple dest * registers. */ diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh index 3f530e182..25f98fa90 100644 --- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -130,7 +130,6 @@ void AlphaDynInst::syscall() { this->cpu->syscall(this->threadNumber); -// this->cpu->syscall(); } #endif diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index 065b2fc4e..01413b414 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -103,6 +103,9 @@ class AlphaFullCPU : public FullBetaCPU this->regFile.setFpcr(val); } + // Most of the full system code and syscall emulation is not yet + // implemented. These functions do show what the final interface will + // look like. #ifdef FULL_SYSTEM uint64_t *getIpr(); uint64_t readIpr(int idx, Fault &fault); diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index c42e9e362..8132ec859 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -71,8 +71,8 @@ AlphaFullCPU::syscall(short thread_num) // Copy over all important state to xc once all the unrolling is done. copyToXC(); + // This is hardcoded to thread 0 while the CPU is only single threaded. this->thread[0]->syscall(); -// this->thread[thread_num]->syscall(); // Copy over all important state back to CPU. copyFromXC(); @@ -355,15 +355,6 @@ AlphaFullCPU::swapPALShadow(bool use_shadow) // Will have to lookup in rename map to get physical registers, then // swap. -/* - for (int i = 0; i < AlphaISA::NumIntRegs; i++) { - if (reg_redir[i]) { - AlphaISA::IntReg temp = regs->intRegFile[i]; - regs->intRegFile[i] = regs->palregs[i]; - regs->palregs[i] = temp; - } - } -*/ } #endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/alpha_impl.hh b/cpu/beta_cpu/alpha_impl.hh index 81a1aba9b..375cb22db 100644 --- a/cpu/beta_cpu/alpha_impl.hh +++ b/cpu/beta_cpu/alpha_impl.hh @@ -3,8 +3,8 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/beta_cpu/cpu_policy.hh" #include "cpu/beta_cpu/alpha_params.hh" +#include "cpu/beta_cpu/cpu_policy.hh" // Forward declarations. template diff --git a/cpu/beta_cpu/bpred_unit.hh b/cpu/beta_cpu/bpred_unit.hh index 53c7146c5..55fba8dd7 100644 --- a/cpu/beta_cpu/bpred_unit.hh +++ b/cpu/beta_cpu/bpred_unit.hh @@ -36,19 +36,16 @@ class TwobitBPredUnit bool predict(DynInstPtr &inst, Addr &PC); - void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, - bool actually_taken); + void update(const InstSeqNum &done_sn); void squash(const InstSeqNum &squashed_sn); - void update(const InstSeqNum &done_sn); + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken); bool BPLookup(Addr &inst_PC) { return BP.lookup(inst_PC); } - unsigned BPReadGlobalHist() - { return 0; } - bool BTBValid(Addr &inst_PC) { return BTB.valid(inst_PC); } @@ -56,7 +53,7 @@ class TwobitBPredUnit { return BTB.lookup(inst_PC); } // Will want to include global history. - void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken) + void BPUpdate(Addr &inst_PC, bool taken) { BP.update(inst_PC, taken); } void BTBUpdate(Addr &inst_PC, Addr &target_PC) diff --git a/cpu/beta_cpu/btb.hh b/cpu/beta_cpu/btb.hh index 81069eabe..0ed128137 100644 --- a/cpu/beta_cpu/btb.hh +++ b/cpu/beta_cpu/btb.hh @@ -1,5 +1,5 @@ -#ifndef __BTB_HH__ -#define __BTB_HH__ +#ifndef __CPU_BETA_CPU_BTB_HH__ +#define __CPU_BETA_CPU_BTB_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" @@ -49,4 +49,4 @@ class DefaultBTB unsigned tagShiftAmt; }; -#endif // __BTB_HH__ +#endif // __CPU_BETA_CPU_BTB_HH__ diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index 18f76d921..fc972491e 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -3,6 +3,7 @@ #include #include + #include "arch/alpha/isa_traits.hh" #include "cpu/inst_seq.hh" @@ -112,11 +113,6 @@ struct TimeBufStruct { uint64_t mispredPC; uint64_t nextPC; - // Think of better names here. - // Will need to be a variety of sizes... - // Maybe make it a vector, that way only need one object. -// std::vector freeRegs; - bool robSquashing; // Represents the instruction that has either been retired or @@ -124,9 +120,8 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; - // Extra bits of information so that the LDSTQ only updates when it + // Extra bit of information so that the LDSTQ only updates when it // needs to. -// bool commitIsStore; bool commitIsLoad; // Communication specifically to the IQ to tell the IQ that it can diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index c04dc8085..df1269b82 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -72,10 +72,6 @@ class SimpleCommit void commit(); - uint64_t readCommitPC(); - - void setSquashing() { _status = ROBSquashing; } - private: void commitInsts(); @@ -86,6 +82,12 @@ class SimpleCommit void markCompletedInsts(); + public: + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: /** Time buffer interface. */ TimeBuffer *timeBuffer; @@ -113,9 +115,6 @@ class SimpleCommit /** Pointer to FullCPU. */ FullCPU *cpu; - //Store buffer interface? Will need to move committed stores to the - //store buffer - /** Memory interface. Used for d-cache accesses. */ MemInterface *dcacheInterface; diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index 17ede9694..de7ecf57e 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -1,10 +1,3 @@ -// @todo: Bug when something reaches execute, and mispredicts, but is never -// put into the ROB because the ROB is full. Need rename stage to predict -// the free ROB entries better. - -#ifndef __COMMIT_IMPL_HH__ -#define __COMMIT_IMPL_HH__ - #include "base/timebuf.hh" #include "cpu/beta_cpu/commit.hh" #include "cpu/exetrace.hh" @@ -274,13 +267,6 @@ SimpleCommit::commitInsts() // time. However, we need to avoid updating any other state // incorrectly if it's already been squashed. if (head_inst->isSquashed()) { - // Hack to avoid the instruction being retired (and deleted) if - // it hasn't been through the IEW stage yet. -/* - if (!head_inst->isExecuted()) { - break; - } -*/ DPRINTF(Commit, "Commit: Retiring squashed instruction from " "ROB.\n"); @@ -418,21 +404,6 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) ++commitCommittedBranches; } -#if 0 - // Explicit communication back to the LDSTQ that a load has been committed - // and can be removed from the LDSTQ. Stores don't need this because - // the LDSTQ will already have been told that a store has reached the head - // of the ROB. Consider including communication if it's a store as well - // to keep things orthagonal. - if (head_inst->isMemRef()) { - ++commitCommittedMemRefs; - if (head_inst->isLoad()) { - toIEW->commitInfo.commitIsLoad = true; - ++commitCommittedLoads; - } - } -#endif - // Now that the instruction is going to be committed, finalize its // trace data. if (head_inst->traceData) { @@ -501,5 +472,3 @@ SimpleCommit::readCommitPC() { return rob->readHeadPC(); } - -#endif // __COMMIT_IMPL_HH__ diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index af2a5ee54..21f6799b7 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -64,9 +64,6 @@ class SimpleDecode void decode(); - // Might want to make squash a friend function. - void squash(); - private: inline bool fetchInstsValid(); @@ -76,8 +73,11 @@ class SimpleDecode void squash(DynInstPtr &inst); - void dumpFetchQueue(); + public: + // Might want to make squash a friend function. + void squash(); + private: // Interfaces to objects outside of decode. /** CPU interface. */ FullCPU *cpu; @@ -113,7 +113,6 @@ class SimpleDecode /** Skid buffer between fetch and decode. */ std::queue skidBuffer; - private: //Consider making these unsigned to avoid any confusion. /** Rename to decode delay, in ticks. */ unsigned renameToDecodeDelay; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index da22baa9b..c7e72be47 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -1,15 +1,9 @@ -// Todo: add in statistics, only get the MachInst and let decode actually -// decode, think about SMT fetch, -// fix up branch prediction stuff into one thing, -// Figure out where to advance time buffer. Add a way to get a -// stage's current status. +// Todo: SMT fetch, +// Add a way to get a stage's current status. #ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__ #define __CPU_BETA_CPU_SIMPLE_FETCH_HH__ -//Will want to include: time buffer, structs, MemInterface, Event, -//whatever class bzero uses, MemReqPtr - #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" @@ -56,6 +50,19 @@ class SimpleFetch bool stalled; + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + public: /** SimpleFetch constructor. */ SimpleFetch(Params ¶ms); @@ -68,20 +75,9 @@ class SimpleFetch void setFetchQueue(TimeBuffer *fq_ptr); - void tick(); - - void fetch(); - void processCacheCompletion(); - // Figure out PC vs next PC and how it should be updated - void squash(const Addr &new_PC); - private: - inline void doSquash(const Addr &new_PC); - - void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); - /** * Looks up in the branch predictor to see if the next PC should be * either next PC+=MachInst or a branch target. @@ -101,6 +97,18 @@ class SimpleFetch */ Fault fetchCacheLine(Addr fetch_PC); + inline void doSquash(const Addr &new_PC); + + void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); + + public: + // Figure out PC vs next PC and how it should be updated + void squash(const Addr &new_PC); + + void tick(); + + void fetch(); + // Align an address (typically a PC) to the start of an I-cache block. // We fold in the PISA 64- to 32-bit conversion here as well. Addr icacheBlockAlignPC(Addr addr) @@ -109,21 +117,6 @@ class SimpleFetch return (addr & ~(cacheBlkMask)); } - public: - class CacheCompletionEvent : public Event - { - private: - SimpleFetch *fetch; - - public: - CacheCompletionEvent(SimpleFetch *_fetch); - - virtual void process(); - virtual const char *description(); - }; - -// CacheCompletionEvent cacheCompletionEvent; - private: /** Pointer to the FullCPU. */ FullCPU *cpu; diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 0ec4c63a3..7adfecc52 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -35,8 +35,7 @@ SimpleFetch::CacheCompletionEvent::description() template SimpleFetch::SimpleFetch(Params ¶ms) - : //cacheCompletionEvent(this), - icacheInterface(params.icacheInterface), + : icacheInterface(params.icacheInterface), branchPred(params), decodeToFetchDelay(params.decodeToFetchDelay), renameToFetchDelay(params.renameToFetchDelay), @@ -254,7 +253,6 @@ SimpleFetch::fetchCacheLine(Addr fetch_PC) // up this stage once the cache miss completes. if (result != MA_HIT && icacheInterface->doEvents()) { memReq->completionEvent = new CacheCompletionEvent(this); -// lastIcacheStall = curTick; // How does current model work as far as individual // stages scheduling/unscheduling? diff --git a/cpu/beta_cpu/free_list.hh b/cpu/beta_cpu/free_list.hh index e8e75f7ec..d25bc1b78 100644 --- a/cpu/beta_cpu/free_list.hh +++ b/cpu/beta_cpu/free_list.hh @@ -1,13 +1,13 @@ -#ifndef __FREE_LIST_HH__ -#define __FREE_LIST_HH__ +#ifndef __CPU_BETA_CPU_FREE_LIST_HH__ +#define __CPU_BETA_CPU_FREE_LIST_HH__ #include #include #include "arch/alpha/isa_traits.hh" -#include "cpu/beta_cpu/comm.hh" -#include "base/traceflags.hh" #include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/beta_cpu/comm.hh" /** * FreeList class that simply holds the list of free integer and floating @@ -25,8 +25,6 @@ */ class SimpleFreeList { - public: - private: /** The list of free integer registers. */ std::queue freeIntRegs; @@ -60,15 +58,15 @@ class SimpleFreeList unsigned _numLogicalFloatRegs, unsigned _numPhysicalFloatRegs); - PhysRegIndex getIntReg(); + inline PhysRegIndex getIntReg(); - PhysRegIndex getFloatReg(); + inline PhysRegIndex getFloatReg(); - void addReg(PhysRegIndex freed_reg); + inline void addReg(PhysRegIndex freed_reg); - void addIntReg(PhysRegIndex freed_reg); + inline void addIntReg(PhysRegIndex freed_reg); - void addFloatReg(PhysRegIndex freed_reg); + inline void addFloatReg(PhysRegIndex freed_reg); bool hasFreeIntRegs() { return !freeIntRegs.empty(); } @@ -166,4 +164,4 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) freeFloatRegs.push(freed_reg); } -#endif // __FREE_LIST_HH__ +#endif // __CPU_BETA_CPU_FREE_LIST_HH__ diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index 3cf5d4aaa..8c2483630 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -1,6 +1,3 @@ -#ifndef __SIMPLE_FULL_CPU_CC__ -#define __SIMPLE_FULL_CPU_CC__ - #ifdef FULL_SYSTEM #include "sim/system.hh" #else @@ -528,5 +525,3 @@ FullBetaCPU::wakeDependents(DynInstPtr &inst) // Forward declaration of FullBetaCPU. template class FullBetaCPU; - -#endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 85fc49371..a7916f9ae 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -12,13 +12,12 @@ #include #include -#include "cpu/beta_cpu/comm.hh" - #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/base_cpu.hh" -#include "cpu/exec_context.hh" +#include "cpu/beta_cpu/comm.hh" #include "cpu/beta_cpu/cpu_policy.hh" +#include "cpu/exec_context.hh" #include "sim/process.hh" #ifdef FULL_SYSTEM @@ -96,15 +95,15 @@ class FullBetaCPU : public BaseFullCPU } public: - void tick(); - FullBetaCPU(Params ¶ms); ~FullBetaCPU(); - void init(); - void fullCPURegStats(); + void tick(); + + void init(); + void activateContext(int thread_num, int delay); void suspendContext(int thread_num); void deallocateContext(int thread_num); diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index 086d39320..79dd809ad 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -361,20 +361,7 @@ SimpleIEW::dispatchInsts() } else if (inst->isStore()) { ldstQueue.insertStore(inst); - // A bit of a hack. Set that it can commit so that - // the commit stage will try committing it, and then - // once commit realizes it's a store it will send back - // a signal to this stage to issue and execute that - // store. Change to be a bit that says the instruction - // has extra work to do at commit. -// inst->setCanCommit(); - -// instQueue.insertNonSpec(inst); - ++iewDispStoreInsts; -// ++iewDispNonSpecInsts; - -// continue; } else if (inst->isNonSpeculative()) { DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " "encountered, skipping.\n"); @@ -404,8 +391,6 @@ SimpleIEW::dispatchInsts() DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " "skipping.\n"); -// assert(inst->isDirectCtrl()); - inst->setIssued(); inst->setCanCommit(); @@ -614,10 +599,6 @@ SimpleIEW::tick() } ++iewSquashCycles; - - // Also should advance its own time buffers if the stage ran. - // Not sure about this... -// issueToExecQueue.advance(); } else if (_status == Blocked) { // Continue to tell previous stage to stall. toRename->iewInfo.stall = true; @@ -654,14 +635,11 @@ SimpleIEW::tick() // or store to commit. Also check if it's being told to execute a // nonspeculative instruction. // This is pretty inefficient... -// if (0/*fromCommit->commitInfo.commitIsStore*/) { if (!fromCommit->commitInfo.squash && !fromCommit->commitInfo.robSquashing) { ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); -// } else if (fromCommit->commitInfo.commitIsLoad) { ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); } -// } if (fromCommit->commitInfo.nonSpecSeqNum != 0) { instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh index 120e6b940..b97797101 100644 --- a/cpu/beta_cpu/inst_queue.hh +++ b/cpu/beta_cpu/inst_queue.hh @@ -1,5 +1,5 @@ -#ifndef __INST_QUEUE_HH__ -#define __INST_QUEUE_HH__ +#ifndef __CPU_BETA_CPU_INST_QUEUE_HH__ +#define __CPU_BETA_CPU_INST_QUEUE_HH__ #include #include @@ -103,19 +103,6 @@ class InstructionQueue void stopSquash(); - /** Debugging function to dump all the list sizes, as well as print - * out the list of nonspeculative instructions. Should not be used - * in any other capacity, but it has no harmful sideaffects. - */ - void dumpLists(); - - private: - /** Debugging function to count how many entries are in the IQ. It does - * a linear walk through the instructions, so do not call this function - * during normal execution. - */ - int countInsts(); - private: /** Pointer to the CPU. */ FullCPU *cpu; @@ -157,9 +144,6 @@ class InstructionQueue /** List of ready branch instructions. */ ReadyInstQueue readyBranchInsts; - /** List of ready memory instructions. */ -// ReadyInstQueue readyMemInsts; - /** List of ready miscellaneous instructions. */ ReadyInstQueue readyMiscInsts; @@ -281,10 +265,26 @@ class InstructionQueue bool addToDependents(DynInstPtr &new_inst); void insertDependency(DynInstPtr &new_inst); void createDependency(DynInstPtr &new_inst); - void dumpDependGraph(); void addIfReady(DynInstPtr &inst); + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); + + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + Stats::Scalar<> iqInstsAdded; Stats::Scalar<> iqNonSpecInstsAdded; // Stats::Scalar<> iqIntInstsAdded; @@ -305,4 +305,4 @@ class InstructionQueue }; -#endif //__INST_QUEUE_HH__ +#endif //__CPU_BETA_CPU_INST_QUEUE_HH__ diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh index d4e3939cf..9f7f13387 100644 --- a/cpu/beta_cpu/inst_queue_impl.hh +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -1,6 +1,3 @@ -#ifndef __INST_QUEUE_IMPL_HH__ -#define __INST_QUEUE_IMPL_HH__ - // Todo: // Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake // it; either do in reverse order, or have added instructions put into a @@ -171,6 +168,13 @@ InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) fromCommit = timeBuffer->getWire(-commitToIEWDelay); } +template +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + // Might want to do something more complex if it knows how many instructions // will be issued this cycle. template @@ -184,13 +188,6 @@ InstructionQueue::isFull() } } -template -unsigned -InstructionQueue::numFreeEntries() -{ - return freeEntries; -} - template void InstructionQueue::insert(DynInstPtr &new_inst) @@ -562,7 +559,6 @@ InstructionQueue::scheduleReadyInsts() break; case Squashed: -// issuing_inst = squashed_head_inst; assert(0 && "Squashed insts should not issue any more!"); squashedInsts.pop(); // Set the squashed instruction as able to commit so that commit @@ -619,6 +615,77 @@ InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) nonSpecInsts.erase(inst_it); } +template +void +InstructionQueue::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + DependencyEntry *curr; + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + DependencyEntry::mem_alloc_counter--; + + curr->inst = NULL; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + template void InstructionQueue::violation(DynInstPtr &store, @@ -747,73 +814,56 @@ InstructionQueue::stopSquash() template void -InstructionQueue::wakeDependents(DynInstPtr &completed_inst) +InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) { - DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); - //Look at the physical destination register of the DynInst - //and look it up on the dependency graph. Then mark as ready - //any instructions within the instruction queue. - DependencyEntry *curr; - - // Tell the memory dependence unit to wake any dependents on this - // instruction if it is a memory instruction. - - if (completed_inst->isMemRef()) { - memDepUnit.wakeDependents(completed_inst); - } - - for (int dest_reg_idx = 0; - dest_reg_idx < completed_inst->numDestRegs(); - dest_reg_idx++) - { - PhysRegIndex dest_reg = - completed_inst->renamedDestRegIdx(dest_reg_idx); - - // Special case of uniq or control registers. They are not - // handled by the IQ and thus have no dependency graph entry. - // @todo Figure out a cleaner way to handle this. - if (dest_reg >= numPhysRegs) { - continue; - } - - DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", - (int) dest_reg); + //Add this new, dependent instruction at the head of the dependency + //chain. - //Maybe abstract this part into a function. - //Go through the dependency chain, marking the registers as ready - //within the waiting instructions. - while (dependGraph[dest_reg].next) { + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; - curr = dependGraph[dest_reg].next; + // Then actually add it to the chain. + this->next = new_entry; - DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", - curr->inst->readPC()); + ++mem_alloc_counter; +} - // Might want to give more information to the instruction - // so that it knows which of its source registers is ready. - // However that would mean that the dependency graph entries - // would need to hold the src_reg_idx. - curr->inst->markSrcRegReady(); +template +void +InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; - addIfReady(curr->inst); + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } - dependGraph[dest_reg].next = curr->next; + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; - DependencyEntry::mem_alloc_counter--; + assert(curr != NULL); + } - curr->inst = NULL; + // Now remove this instruction from the list. + prev->next = curr->next; - delete curr; - } + --mem_alloc_counter; - // Reset the head node now that all of its dependents have been woken - // up. - dependGraph[dest_reg].next = NULL; - dependGraph[dest_reg].inst = NULL; + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; - // Mark the scoreboard as having that register ready. - regScoreboard[dest_reg] = true; - } + delete curr; } template @@ -898,60 +948,6 @@ InstructionQueue::createDependency(DynInstPtr &new_inst) } } -template -void -InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) -{ - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DependencyEntry *new_entry = new DependencyEntry; - new_entry->next = this->next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - this->next = new_entry; - - ++mem_alloc_counter; -} - -template -void -InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) -{ - DependencyEntry *prev = this; - DependencyEntry *curr = this->next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - // Find the instruction to remove within the dependency linked list. - while(curr->inst != inst_to_remove) - { - prev = curr; - curr = curr->next; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --mem_alloc_counter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; -} - template void InstructionQueue::addIfReady(DynInstPtr &inst) @@ -1090,8 +1086,6 @@ InstructionQueue::dumpLists() cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); -// cprintf("Ready memory list size: %i\n", readyMemInsts.size()); - cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); cprintf("Squashed list size: %i\n", squashedInsts.size()); @@ -1110,5 +1104,3 @@ InstructionQueue::dumpLists() cprintf("\n"); } - -#endif // __INST_QUEUE_IMPL_HH__ diff --git a/cpu/beta_cpu/mem_dep_unit.hh b/cpu/beta_cpu/mem_dep_unit.hh index e43543e09..5da57945f 100644 --- a/cpu/beta_cpu/mem_dep_unit.hh +++ b/cpu/beta_cpu/mem_dep_unit.hh @@ -1,12 +1,12 @@ -#ifndef __MEM_DEP_UNIT_HH__ -#define __MEM_DEP_UNIT_HH__ +#ifndef __CPU_BETA_CPU_MEM_DEP_UNIT_HH__ +#define __CPU_BETA_CPU_MEM_DEP_UNIT_HH__ -#include #include +#include -#include "cpu/inst_seq.hh" #include "base/statistics.hh" +#include "cpu/inst_seq.hh" /** * Memory dependency unit class. This holds the memory dependence predictor. @@ -34,6 +34,12 @@ class MemDepUnit { void insertNonSpec(DynInstPtr &inst); + // Will want to make this operation relatively fast. Right now it + // is somewhat slow. + DynInstPtr &top(); + + void pop(); + void regsReady(DynInstPtr &inst); void nonSpecInstReady(DynInstPtr &inst); @@ -46,12 +52,6 @@ class MemDepUnit { void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); - // Will want to make this operation relatively fast. Right now it - // kind of sucks. - DynInstPtr &top(); - - void pop(); - inline bool empty() { return readyInsts.empty(); } @@ -91,11 +91,8 @@ class MemDepUnit { } }; - - private: inline void moveToReady(dep_it_t &woken_inst); - private: /** List of instructions that have passed through rename, yet are still * waiting on either a memory dependence to resolve or source registers to * become available before they can issue. @@ -137,4 +134,4 @@ class MemDepUnit { Stats::Scalar<> conflictingStores; }; -#endif +#endif // __CPU_BETA_CPU_MEM_DEP_UNIT_HH__ diff --git a/cpu/beta_cpu/ras.hh b/cpu/beta_cpu/ras.hh index 7666f825f..51dab15e4 100644 --- a/cpu/beta_cpu/ras.hh +++ b/cpu/beta_cpu/ras.hh @@ -1,5 +1,5 @@ -#ifndef __RAS_HH__ -#define __RAS_HH__ +#ifndef __CPU_BETA_CPU_RAS_HH__ +#define __CPU_BETA_CPU_RAS_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" @@ -23,7 +23,7 @@ class ReturnAddrStack private: inline void incrTos() - { tos = (tos + 1) % numEntries; } + { if (++tos == numEntries) tos = 0; } inline void decrTos() { tos = (tos == 0 ? numEntries - 1 : tos - 1); } @@ -37,4 +37,4 @@ class ReturnAddrStack unsigned tos; }; -#endif // __RAS_HH__ +#endif // __CPU_BETA_CPU_RAS_HH__ diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index c9d1b092f..d7664707d 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -8,8 +8,8 @@ #include "cpu/beta_cpu/comm.hh" #ifdef FULL_SYSTEM -#include "kern/kernel_stats.hh" #include "arch/alpha/ev5.hh" +#include "kern/kernel_stats.hh" using namespace EV5; #endif @@ -19,8 +19,6 @@ using namespace EV5; // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, // and should go in the AlphaFullCPU. -extern void debug_break(); - template class PhysRegFile { @@ -203,8 +201,11 @@ class PhysRegFile /** Miscellaneous register file. */ MiscRegFile miscRegs; - Addr pc; // program counter - Addr npc; // next-cycle program counter + /** Program counter. */ + Addr pc; + + /** Next-cycle program counter. */ + Addr npc; #ifdef FULL_SYSTEM private: @@ -408,7 +409,6 @@ PhysRegFile::setIpr(int idx, uint64_t val) // write entire quad w/ no side-effect old = ipr[idx]; ipr[idx] = val; -// kernelStats.context(old, val); break; case ISA::IPR_DTB_PTE: @@ -435,14 +435,9 @@ PhysRegFile::setIpr(int idx, uint64_t val) // only write least significant five bits - interrupt level ipr[idx] = val & 0x1f; -// kernelStats.swpipl(ipr[idx]); break; case ISA::IPR_DTB_CM: -// if (val & 0x18) -// kernelStats->mode(Kernel::user); -// else -// kernelStats->mode(Kernel::kernel); case ISA::IPR_ICM: // only write two mode bits - processor mode diff --git a/cpu/beta_cpu/rename_impl.hh b/cpu/beta_cpu/rename_impl.hh index 5a8e499e9..5ad0d1416 100644 --- a/cpu/beta_cpu/rename_impl.hh +++ b/cpu/beta_cpu/rename_impl.hh @@ -507,6 +507,7 @@ SimpleRename::tick() DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); _status = Running; + rename(); } else { doSquash(); } @@ -523,25 +524,6 @@ SimpleRename::tick() #endif } - // Perhaps put this outside of this function, since this will - // happen regardless of whether or not the stage is blocked or - // squashing. - // Read from the time buffer any necessary data. - // Read registers that are freed, and add them to the freelist. - // This is unnecessary due to the history buffer (assuming the history - // buffer works properly). -/* - while(!fromCommit->commitInfo.freeRegs.empty()) - { - PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back(); - DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n", - (int)freed_reg); - freeList->addReg(freed_reg); - - fromCommit->commitInfo.freeRegs.pop_back(); - } -*/ - } template diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc index 5a22278eb..41e34adef 100644 --- a/cpu/beta_cpu/tournament_pred.cc +++ b/cpu/beta_cpu/tournament_pred.cc @@ -10,52 +10,52 @@ TournamentBP::TournamentBP(unsigned _local_predictor_size, unsigned _choice_predictor_size, unsigned _choice_ctr_bits, unsigned _instShiftAmt) - : local_predictor_size(_local_predictor_size), - local_ctr_bits(_local_ctr_bits), - local_history_table_size(_local_history_table_size), - local_history_bits(_local_history_bits), - global_predictor_size(_global_predictor_size), - global_ctr_bits(_global_ctr_bits), - global_history_bits(_global_history_bits), - choice_predictor_size(_global_predictor_size), - choice_ctr_bits(_choice_ctr_bits), + : localPredictorSize(_local_predictor_size), + localCtrBits(_local_ctr_bits), + localHistoryTableSize(_local_history_table_size), + localHistoryBits(_local_history_bits), + globalPredictorSize(_global_predictor_size), + globalCtrBits(_global_ctr_bits), + globalHistoryBits(_global_history_bits), + choicePredictorSize(_global_predictor_size), + choiceCtrBits(_choice_ctr_bits), instShiftAmt(_instShiftAmt) { //Should do checks here to make sure sizes are correct (powers of 2) //Setup the array of counters for the local predictor - local_ctrs = new SatCounter[local_predictor_size]; + localCtrs = new SatCounter[localPredictorSize]; - for (int i = 0; i < local_predictor_size; ++i) - local_ctrs[i].setBits(local_ctr_bits); + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(localCtrBits); //Setup the history table for the local table - local_history_table = new unsigned[local_history_table_size]; + localHistoryTable = new unsigned[localHistoryTableSize]; - for (int i = 0; i < local_history_table_size; ++i) - local_history_table[i] = 0; + for (int i = 0; i < localHistoryTableSize; ++i) + localHistoryTable[i] = 0; // Setup the local history mask - localHistoryMask = (1 << local_history_bits) - 1; + localHistoryMask = (1 << localHistoryBits) - 1; //Setup the array of counters for the global predictor - global_ctrs = new SatCounter[global_predictor_size]; + globalCtrs = new SatCounter[globalPredictorSize]; - for (int i = 0; i < global_predictor_size; ++i) - global_ctrs[i].setBits(global_ctr_bits); + for (int i = 0; i < globalPredictorSize; ++i) + globalCtrs[i].setBits(globalCtrBits); //Clear the global history - global_history = 0; + globalHistory = 0; // Setup the global history mask - globalHistoryMask = (1 << global_history_bits) - 1; + globalHistoryMask = (1 << globalHistoryBits) - 1; //Setup the array of counters for the choice predictor - choice_ctrs = new SatCounter[choice_predictor_size]; + choiceCtrs = new SatCounter[choicePredictorSize]; - for (int i = 0; i < choice_predictor_size; ++i) - choice_ctrs[i].setBits(choice_ctr_bits); + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); - threshold = (1 << (local_ctr_bits - 1)) - 1; + threshold = (1 << (localCtrBits - 1)) - 1; threshold = threshold / 2; } @@ -63,29 +63,29 @@ inline unsigned TournamentBP::calcLocHistIdx(Addr &branch_addr) { - return (branch_addr >> instShiftAmt) & (local_history_table_size - 1); + return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); } inline void TournamentBP::updateHistoriesTaken(unsigned local_history_idx) { - global_history = (global_history << 1) | 1; - global_history = global_history & globalHistoryMask; + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; - local_history_table[local_history_idx] = - (local_history_table[local_history_idx] << 1) | 1; + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; } inline void TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) { - global_history = (global_history << 1); - global_history = global_history & globalHistoryMask; + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; - local_history_table[local_history_idx] = - (local_history_table[local_history_idx] << 1); + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1); } bool @@ -100,15 +100,15 @@ TournamentBP::lookup(Addr &branch_addr) //Lookup in the local predictor to get its branch prediction local_history_idx = calcLocHistIdx(branch_addr); - local_predictor_idx = local_history_table[local_history_idx] + local_predictor_idx = localHistoryTable[local_history_idx] & localHistoryMask; - local_prediction = local_ctrs[local_predictor_idx].read(); + local_prediction = localCtrs[local_predictor_idx].read(); //Lookup in the global predictor to get its branch prediction - global_prediction = global_ctrs[global_history].read(); + global_prediction = globalCtrs[globalHistory].read(); //Lookup in the choice predictor to see which one to use - choice_prediction = choice_ctrs[global_history].read(); + choice_prediction = choiceCtrs[globalHistory].read(); //@todo Put a threshold value in for the three predictors that can // be set through the constructor (so this isn't hard coded). @@ -117,21 +117,21 @@ TournamentBP::lookup(Addr &branch_addr) if (global_prediction > threshold) { updateHistoriesTaken(local_history_idx); - assert(global_history < global_predictor_size && - local_history_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); - global_ctrs[global_history].increment(); - local_ctrs[local_history_idx].increment(); + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); return true; } else { updateHistoriesNotTaken(local_history_idx); - assert(global_history < global_predictor_size && - local_history_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); - global_ctrs[global_history].decrement(); - local_ctrs[local_history_idx].decrement(); + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); return false; } @@ -139,21 +139,21 @@ TournamentBP::lookup(Addr &branch_addr) if (local_prediction > threshold) { updateHistoriesTaken(local_history_idx); - assert(global_history < global_predictor_size && - local_history_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); - global_ctrs[global_history].increment(); - local_ctrs[local_history_idx].increment(); + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); return true; } else { updateHistoriesNotTaken(local_history_idx); - assert(global_history < global_predictor_size && - local_history_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); - global_ctrs[global_history].decrement(); - local_ctrs[local_history_idx].decrement(); + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); return false; } @@ -174,20 +174,20 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) bool global_pred_taken; // Load the correct global history into the register. - global_history = correct_gh; + globalHistory = correct_gh; // Get the local predictor's current prediction, remove the incorrect // update, and update the local predictor local_history_idx = calcLocHistIdx(branch_addr); - local_predictor_idx = local_history_table[local_history_idx]; + local_predictor_idx = localHistoryTable[local_history_idx]; local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; - local_prediction = local_ctrs[local_predictor_idx].read(); + local_prediction = localCtrs[local_predictor_idx].read(); local_pred_taken = local_prediction > threshold; //Get the global predictor's current prediction, and update the //global predictor - global_prediction = global_ctrs[global_history].read(); + global_prediction = globalCtrs[globalHistory].read(); global_pred_taken = global_prediction > threshold; //Update the choice predictor to tell it which one was correct @@ -195,34 +195,34 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) //If the local prediction matches the actual outcome, decerement //the counter. Otherwise increment the counter. if (local_pred_taken == taken) { - choice_ctrs[global_history].decrement(); + choiceCtrs[globalHistory].decrement(); } else { - choice_ctrs[global_history].increment(); + choiceCtrs[globalHistory].increment(); } } if (taken) { - assert(global_history < global_predictor_size && - local_predictor_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); - local_ctrs[local_predictor_idx].increment(); - global_ctrs[global_history].increment(); + localCtrs[local_predictor_idx].increment(); + globalCtrs[globalHistory].increment(); - global_history = (global_history << 1) | 1; - global_history = global_history & globalHistoryMask; + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; - local_history_table[local_history_idx] |= 1; + localHistoryTable[local_history_idx] |= 1; } else { - assert(global_history < global_predictor_size && - local_predictor_idx < local_predictor_size); + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); - local_ctrs[local_predictor_idx].decrement(); - global_ctrs[global_history].decrement(); + localCtrs[local_predictor_idx].decrement(); + globalCtrs[globalHistory].decrement(); - global_history = (global_history << 1); - global_history = global_history & globalHistoryMask; + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; - local_history_table[local_history_idx] &= ~1; + localHistoryTable[local_history_idx] &= ~1; } } diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh index 1512abc78..563da6f23 100644 --- a/cpu/beta_cpu/tournament_pred.hh +++ b/cpu/beta_cpu/tournament_pred.hh @@ -37,7 +37,7 @@ class TournamentBP */ void update(Addr &branch_addr, unsigned global_history, bool taken); - inline unsigned readGlobalHist() { return global_history; } + inline unsigned readGlobalHist() { return globalHistory; } private: @@ -50,56 +50,56 @@ class TournamentBP inline void updateHistoriesNotTaken(unsigned local_history_idx); /** Local counters. */ - SatCounter *local_ctrs; + SatCounter *localCtrs; /** Size of the local predictor. */ - unsigned local_predictor_size; + unsigned localPredictorSize; /** Number of bits of the local predictor's counters. */ - unsigned local_ctr_bits; + unsigned localCtrBits; /** Array of local history table entries. */ - unsigned *local_history_table; + unsigned *localHistoryTable; /** Size of the local history table. */ - unsigned local_history_table_size; + unsigned localHistoryTableSize; /** Number of bits for each entry of the local history table. * @todo Doesn't this come from the size of the local predictor? */ - unsigned local_history_bits; + unsigned localHistoryBits; /** Mask to get the proper local history. */ unsigned localHistoryMask; /** Array of counters that make up the global predictor. */ - SatCounter *global_ctrs; + SatCounter *globalCtrs; /** Size of the global predictor. */ - unsigned global_predictor_size; + unsigned globalPredictorSize; /** Number of bits of the global predictor's counters. */ - unsigned global_ctr_bits; + unsigned globalCtrBits; /** Global history register. */ - unsigned global_history; + unsigned globalHistory; /** Number of bits for the global history. */ - unsigned global_history_bits; + unsigned globalHistoryBits; /** Mask to get the proper global history. */ unsigned globalHistoryMask; /** Array of counters that make up the choice predictor. */ - SatCounter *choice_ctrs; + SatCounter *choiceCtrs; /** Size of the choice predictor (identical to the global predictor). */ - unsigned choice_predictor_size; + unsigned choicePredictorSize; /** Number of bits of the choice predictor's counters. */ - unsigned choice_ctr_bits; + unsigned choiceCtrBits; /** Number of bits to shift the instruction over to get rid of the word * offset. -- cgit v1.2.3