diff options
Diffstat (limited to 'src/cpu')
82 files changed, 2674 insertions, 484 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript index bc4ec7923..2bb9a2399 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -71,7 +71,8 @@ virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) c # Generate a temporary CPU list, including the CheckerCPU if # it's enabled. This isn't used for anything else other than StaticInst # headers. -temp_cpu_list = env['CPU_MODELS'] +temp_cpu_list = env['CPU_MODELS'][:] + if env['USE_CHECKER']: temp_cpu_list.append('CheckerCPU') @@ -113,6 +114,9 @@ CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] # ################################################################# +# Keep a list of CPU models that support SMT +env['SMT_CPU_MODELS'] = [] + sources = [] need_simple_base = False @@ -156,6 +160,8 @@ if 'O3CPU' in env['CPU_MODELS']: ''') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') + else: + env['SMT_CPU_MODELS'].append('O3CPU') # Checker doesn't support SMT right now if 'OzoneCPU' in env['CPU_MODELS']: need_bp_unit = True diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9cc61f74c..40611abe6 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -215,6 +215,9 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr nextPC; + /** Next non-speculative NPC. Target PC for Mips or Sparc. */ + Addr nextNPC; + /** Predicted next PC. */ Addr predPC; @@ -275,6 +278,11 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr readNextPC() { return nextPC; } + /** Returns the next NPC. This could be the speculative next NPC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextNPC() { return nextNPC; } + /** Set the predicted target of this current instruction. */ void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } @@ -282,11 +290,20 @@ class BaseDynInst : public FastAlloc, public RefCounted Addr readPredTarg() { return predPC; } /** Returns whether the instruction was predicted taken or not. */ - bool predTaken() { return predPC != (PC + sizeof(MachInst)); } + bool predTaken() +#if THE_ISA == ALPHA_ISA + { return predPC != (PC + sizeof(MachInst)); } +#else + { return predPC != (nextPC + sizeof(MachInst)); } +#endif /** Returns whether the instruction mispredicted. */ - bool mispredicted() { return predPC != nextPC; } - + bool mispredicted() +#if THE_ISA == ALPHA_ISA + { return predPC != nextPC; } +#else + { return predPC != nextNPC; } +#endif // // Instruction types. Forward checks to StaticInst object. // @@ -308,6 +325,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } bool isCondCtrl() const { return staticInst->isCondCtrl(); } bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const @@ -545,6 +563,12 @@ class BaseDynInst : public FastAlloc, public RefCounted nextPC = val; } + /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ + void setNextNPC(uint64_t val) + { + nextNPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 91424faad..f2109e88d 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -36,15 +36,12 @@ #include "base/cprintf.hh" #include "base/trace.hh" -#include "arch/faults.hh" +#include "sim/faults.hh" #include "cpu/exetrace.hh" #include "mem/request.hh" #include "cpu/base_dyn_inst.hh" -using namespace std; -using namespace TheISA; - #define NOHASH #ifndef NOHASH @@ -65,7 +62,7 @@ my_hash_t thishash; #endif template <class Impl> -BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC, +BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, ImplCPU *cpu) : staticInst(machInst), traceData(NULL), cpu(cpu) @@ -73,7 +70,8 @@ BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC, seqNum = seq_num; PC = inst_PC; - nextPC = PC + sizeof(MachInst); + nextPC = PC + sizeof(TheISA::MachInst); + nextNPC = nextPC + sizeof(TheISA::MachInst); predPC = pred_PC; initVars(); @@ -249,7 +247,7 @@ void BaseDynInst<Impl>::dump() { cprintf("T%d : %#08d `", threadNumber, PC); - cout << staticInst->disassemble(PC); + std::cout << staticInst->disassemble(PC); cprintf("'\n"); } diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index a508c56ba..6d6ae1e0a 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -170,7 +170,7 @@ class CheckerCPU : public BaseCPU virtual Counter totalInstructions() const { - return numInst - startNumInst; + return 0; } // number of simulated loads diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index c035e92ac..8c0186dae 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -31,6 +31,7 @@ #ifndef __CPU_CHECKER_THREAD_CONTEXT_HH__ #define __CPU_CHECKER_THREAD_CONTEXT_HH__ +#include "arch/types.hh" #include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" @@ -295,8 +296,8 @@ class CheckerThreadContext : public ThreadContext Counter readFuncExeInst() { return actualTC->readFuncExeInst(); } #endif - void changeRegFileContext(RegFile::ContextParam param, - RegFile::ContextVal val) + void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) { actualTC->changeRegFileContext(param, val); checkerTC->changeRegFileContext(param, val); diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc index 7fdad5113..748f66d37 100644 --- a/src/cpu/exetrace.cc +++ b/src/cpu/exetrace.cc @@ -34,6 +34,7 @@ #include <fstream> #include <iomanip> +#include "arch/regfile.hh" #include "base/loader/symtab.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" @@ -42,7 +43,7 @@ #include "sim/system.hh" using namespace std; - +using namespace TheISA; //////////////////////////////////////////////////////////////////////// // @@ -53,7 +54,43 @@ using namespace std; void Trace::InstRecord::dump(ostream &outs) { - if (flags[INTEL_FORMAT]) { + if (flags[PRINT_REG_DELTA]) + { + outs << "PC = 0x" << setbase(16) + << setfill('0') + << setw(16) << PC << endl; + outs << setbase(10) + << setfill(' ') + << setw(0); + /* + int numSources = staticInst->numSrcRegs(); + int numDests = staticInst->numDestRegs(); + outs << "Sources:"; + for(int x = 0; x < numSources; x++) + { + int sourceNum = staticInst->srcRegIdx(x); + if(sourceNum < FP_Base_DepTag) + outs << " " << getIntRegName(sourceNum); + else if(sourceNum < Ctrl_Base_DepTag) + outs << " " << getFloatRegName(sourceNum - FP_Base_DepTag); + else + outs << " " << getMiscRegName(sourceNum - Ctrl_Base_DepTag); + } + outs << endl; + outs << "Destinations:"; + for(int x = 0; x < numDests; x++) + { + int destNum = staticInst->destRegIdx(x); + if(destNum < FP_Base_DepTag) + outs << " " << getIntRegName(destNum); + else if(destNum < Ctrl_Base_DepTag) + outs << " " << getFloatRegName(destNum - FP_Base_DepTag); + else + outs << " " << getMiscRegName(destNum - Ctrl_Base_DepTag); + } + outs << endl;*/ + } + else if (flags[INTEL_FORMAT]) { #if FULL_SYSTEM bool is_trace_system = (cpu->system->name() == trace_system); #else @@ -196,6 +233,8 @@ Param<bool> exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq", "print fetch sequence number", false); Param<bool> exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq", "print correct-path sequence number", false); +Param<bool> exe_trace_print_reg_delta(&exeTraceParams, "print_reg_delta", + "print which registers changed to what", false); Param<bool> exe_trace_pc_symbol(&exeTraceParams, "pc_symbol", "Use symbols for the PC if available", true); Param<bool> exe_trace_intel_format(&exeTraceParams, "intel_format", @@ -222,6 +261,7 @@ Trace::InstRecord::setParams() flags[PRINT_INT_REGS] = exe_trace_print_iregs; flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq; flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq; + flags[PRINT_REG_DELTA] = exe_trace_print_reg_delta; flags[PC_SYMBOL] = exe_trace_pc_symbol; flags[INTEL_FORMAT] = exe_trace_intel_format; trace_system = exe_trace_system; diff --git a/src/cpu/exetrace.hh b/src/cpu/exetrace.hh index 95f8b449c..8cc98b777 100644 --- a/src/cpu/exetrace.hh +++ b/src/cpu/exetrace.hh @@ -147,6 +147,7 @@ class InstRecord : public Record PRINT_INT_REGS, PRINT_FETCH_SEQ, PRINT_CP_SEQ, + PRINT_REG_DELTA, PC_SYMBOL, INTEL_FORMAT, NUM_BITS diff --git a/src/cpu/o3/2bit_local_pred.hh b/src/cpu/o3/2bit_local_pred.hh index 0a2a71d3e..954b86b4c 100644 --- a/src/cpu/o3/2bit_local_pred.hh +++ b/src/cpu/o3/2bit_local_pred.hh @@ -31,9 +31,8 @@ #ifndef __CPU_O3_2BIT_LOCAL_PRED_HH__ #define __CPU_O3_2BIT_LOCAL_PRED_HH__ -// For Addr type. -#include "arch/isa_traits.hh" #include "cpu/o3/sat_counter.hh" +#include "sim/host.hh" #include <vector> diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript index e65d41411..afbd4c533 100755 --- a/src/cpu/o3/SConscript +++ b/src/cpu/o3/SConscript @@ -52,21 +52,19 @@ if env['TARGET_ISA'] == 'alpha': alpha/cpu_builder.cc ''') elif env['TARGET_ISA'] == 'mips': - sys.exit('O3 CPU does not support MIPS') - #sources += Split(''' - # mips/dyn_inst.cc - # mips/cpu.cc - # mips/thread_context.cc - # mips/cpu_builder.cc - # ''') + sources += Split(''' + mips/dyn_inst.cc + mips/cpu.cc + mips/thread_context.cc + mips/cpu_builder.cc + ''') elif env['TARGET_ISA'] == 'sparc': - sys.exit('O3 CPU does not support MIPS') - #sources += Split(''' - # sparc/dyn_inst.cc - # sparc/cpu.cc - # sparc/thread_context.cc - # sparc/cpu_builder.cc - # ''') + sources += Split(''' + sparc/dyn_inst.cc + sparc/cpu.cc + sparc/thread_context.cc + sparc/cpu_builder.cc + ''') else: sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh index b961341d5..9d97f9701 100644 --- a/src/cpu/o3/alpha/cpu.hh +++ b/src/cpu/o3/alpha/cpu.hh @@ -31,7 +31,8 @@ #ifndef __CPU_O3_ALPHA_CPU_HH__ #define __CPU_O3_ALPHA_CPU_HH__ -#include "arch/isa_traits.hh" +#include "arch/regfile.hh" +#include "arch/types.hh" #include "cpu/thread_context.hh" #include "cpu/o3/cpu.hh" #include "sim/byteswap.hh" diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index 0473e60c2..b7362fad9 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -31,6 +31,7 @@ #include "config/use_checker.hh" #include "arch/alpha/faults.hh" +#include "arch/alpha/isa_traits.hh" #include "base/cprintf.hh" #include "base/statistics.hh" #include "base/timebuf.hh" @@ -53,8 +54,6 @@ #include "sim/system.hh" #endif -using namespace TheISA; - template <class Impl> AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) #if FULL_SYSTEM @@ -191,14 +190,14 @@ AlphaO3CPU<Impl>::regStats() template <class Impl> -MiscReg +TheISA::MiscReg AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) { return this->regFile.readMiscReg(misc_reg, tid); } template <class Impl> -MiscReg +TheISA::MiscReg AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid) { @@ -300,6 +299,7 @@ template <class Impl> void AlphaO3CPU<Impl>::processInterrupts() { + using namespace TheISA; // Check for interrupts here. For now can copy the code that // exists within isa_fullsys_traits.hh. Also assume that thread 0 // is the one that handles the interrupts. @@ -411,12 +411,12 @@ AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) // return value itself in the standard return value reg (v0). if (return_value.successful()) { // no error - this->setArchIntReg(SyscallSuccessReg, 0, tid); - this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid); + this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid); } else { // got an error, return details - this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); - this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + this->setArchIntReg(TheISA::SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid); } } #endif diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index ad52b0d2e..70a09940f 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -26,9 +26,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ +#include "arch/alpha/types.hh" #include "cpu/o3/thread_context.hh" template <class Impl> @@ -65,8 +65,8 @@ class AlphaTC : public O3ThreadContext<Impl> panic("Alpha has no NextNPC!"); } - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) + virtual void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) { panic("Not supported on Alpha!"); } diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh index 2c0a39565..3c4c8e478 100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/o3/bpred_unit.hh @@ -31,8 +31,6 @@ #ifndef __CPU_O3_BPRED_UNIT_HH__ #define __CPU_O3_BPRED_UNIT_HH__ -// For Addr type. -#include "arch/isa_traits.hh" #include "base/statistics.hh" #include "cpu/inst_seq.hh" @@ -41,6 +39,8 @@ #include "cpu/o3/ras.hh" #include "cpu/o3/tournament_pred.hh" +#include "sim/host.hh" + #include <list> /** diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 0da02145b..e4e656632 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -28,15 +28,11 @@ * Authors: Kevin Lim */ -#include <list> -#include <vector> - +#include "arch/types.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/bpred_unit.hh" -using namespace std; - template<class Impl> BPredUnit<Impl>::BPredUnit(Params *params) : BTB(params->BTBEntries, @@ -159,7 +155,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) void *bp_history = NULL; if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. BPUncond(bp_history); @@ -201,15 +197,20 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++BTBLookups; if (inst->isCall()) { - RAS[tid].push(PC + sizeof(MachInst)); +#if THE_ISA == ALPHA_ISA + Addr ras_pc = PC + sizeof(MachInst); // Next PC +#else + Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC +#endif + RAS[tid].push(ras_pc); // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. predict_record.wasCall = true; - DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call" + DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x was a call" ", adding %#x to the RAS.\n", - tid, inst->readPC(), PC + sizeof(MachInst)); + tid, inst->readPC(), ras_pc); } if (BTB.valid(PC, tid)) { @@ -242,7 +243,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size()); + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); return pred_taken; } @@ -251,8 +252,8 @@ template <class Impl> void BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" - "number %lli.\n", tid, done_sn); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until " + "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { diff --git a/src/cpu/o3/btb.cc b/src/cpu/o3/btb.cc index 01640f4d1..93d6ee768 100644 --- a/src/cpu/o3/btb.cc +++ b/src/cpu/o3/btb.cc @@ -32,8 +32,6 @@ #include "base/trace.hh" #include "cpu/o3/btb.hh" -using namespace TheISA; - DefaultBTB::DefaultBTB(unsigned _numEntries, unsigned _tagBits, unsigned _instShiftAmt) diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh index dfa3b7b06..3c4899e89 100644 --- a/src/cpu/o3/btb.hh +++ b/src/cpu/o3/btb.hh @@ -31,9 +31,8 @@ #ifndef __CPU_O3_BTB_HH__ #define __CPU_O3_BTB_HH__ -// For Addr type. -#include "arch/isa_traits.hh" #include "base/misc.hh" +#include "sim/host.hh" class DefaultBTB { diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index bf1bd08e8..aa58fc20e 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -33,8 +33,7 @@ #include <vector> -#include "arch/faults.hh" -#include "arch/isa_traits.hh" +#include "sim/faults.hh" #include "cpu/inst_seq.hh" #include "sim/host.hh" @@ -88,6 +87,7 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; + bool condDelaySlotBranch[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; @@ -113,6 +113,7 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -165,6 +166,9 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; + bool squashDelaySlot; + //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 956b6ec3e..7575783f7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -32,7 +32,6 @@ #ifndef __CPU_O3_COMMIT_HH__ #define __CPU_O3_COMMIT_HH__ -#include "arch/faults.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/exetrace.hh" @@ -165,6 +164,9 @@ class DefaultCommit /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); + /** Skid buffer between rename and commit. */ + std::queue<DynInstPtr> skidBuffer; + /** The pointer to the IEW stage. Used solely to ensure that * various events (traps, interrupts, syscalls) do not occur until * all stores have written back. @@ -256,6 +258,9 @@ class DefaultCommit /** Gets instructions from rename and inserts them into the ROB. */ void getInsts(); + /** Insert all instructions from rename into skidBuffer */ + void skidInsert(); + /** Marks completed instructions using information sent from IEW. */ void markCompletedInsts(); @@ -286,13 +291,11 @@ class DefaultCommit /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } -#if THE_ISA != ALPHA_ISA /** Reads the next NPC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } -#endif + void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } private: /** Time buffer interface. */ @@ -397,10 +400,8 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; -#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 904af1071..f200f5f18 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" @@ -44,8 +45,6 @@ #include "cpu/checker/cpu.hh" #endif -using namespace std; - template <class Impl> DefaultCommit<Impl>::TrapEvent::TrapEvent(DefaultCommit<Impl> *_commit, unsigned _tid) @@ -86,7 +85,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) { _status = Active; _nextStatus = Inactive; - string policy = params->smtCommitPolicy; + std::string policy = params->smtCommitPolicy; //Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -120,7 +119,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = 0; + PC[i] = nextPC[i] = nextNPC[i] = 0; } } @@ -235,7 +234,7 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) template <class Impl> void -DefaultCommit<Impl>::setThreads(vector<Thread *> &threads) +DefaultCommit<Impl>::setThreads(std::vector<Thread *> &threads) { thread = threads; } @@ -296,7 +295,7 @@ DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) template<class Impl> void -DefaultCommit<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(Commit, "Commit: Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -390,7 +389,7 @@ void DefaultCommit<Impl>::updateStatus() { // reset ROB changed variable - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; changedROBNumEntries[tid] = false; @@ -419,7 +418,7 @@ DefaultCommit<Impl>::setNextStatus() { int squashes = 0; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -442,7 +441,7 @@ template <class Impl> bool DefaultCommit<Impl>::changedROBEntries() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -569,7 +568,7 @@ DefaultCommit<Impl>::tick() if ((*activeThreads).size() <= 0) return; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the // status if they are done. @@ -687,7 +686,7 @@ DefaultCommit<Impl>::commit() // Check for any possible squashes, handle them first //////////////////////////////////// - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -723,14 +722,48 @@ DefaultCommit<Impl>::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; - if (fromIEW->includeSquashInst[tid] == true) - squashed_inst--; +#if THE_ISA != ALPHA_ISA + InstSeqNum bdelay_done_seq_num; + bool squash_bdelay_slot; + + if (fromIEW->branchMispredict[tid]) { + if (fromIEW->branchTaken[tid] && + fromIEW->condDelaySlotBranch[tid]) { + DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch" + "mispredicted as taken. Squashing after previous " + "inst, [sn:%i]\n", + tid, squashed_inst); + bdelay_done_seq_num = squashed_inst; + squash_bdelay_slot = true; + } else { + DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing " + "after delay slot [sn:%i]\n", tid, squashed_inst+1); + bdelay_done_seq_num = squashed_inst + 1; + squash_bdelay_slot = false; + } + } else { + bdelay_done_seq_num = squashed_inst; + } +#endif + if (fromIEW->includeSquashInst[tid] == true) { + squashed_inst--; +#if THE_ISA != ALPHA_ISA + bdelay_done_seq_num--; +#endif + } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; +#if THE_ISA == ALPHA_ISA rob->squash(squashed_inst, tid); + toIEW->commitInfo[tid].squashDelaySlot = true; +#else + rob->squash(bdelay_done_seq_num, tid); + toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; + toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; +#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -766,6 +799,10 @@ DefaultCommit<Impl>::commit() // Try to commit any instructions. commitInsts(); + } else { +#if THE_ISA != ALPHA_ISA + skidInsert(); +#endif } //Check for any activity @@ -840,6 +877,7 @@ DefaultCommit<Impl>::commitInsts() } else { PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); + nextNPC[tid] = head_inst->readNextNPC(); // Increment the total number of non-speculative instructions // executed. @@ -868,7 +906,13 @@ DefaultCommit<Impl>::commitInsts() } PC[tid] = nextPC[tid]; +#if THE_ISA == ALPHA_ISA nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); +#else + nextPC[tid] = nextNPC[tid]; + nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); +#endif + #if FULL_SYSTEM int count = 0; Addr oldpc; @@ -996,6 +1040,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); + // DTB will sometimes need the machine instruction for when + // faults happen. So we will set it here, prior to the DTB + // possibly needing it for its fault. + thread[tid]->setInst( + static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); + if (inst_fault != NoFault) { head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1018,12 +1068,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // execution doesn't generate extra squashes. thread[tid]->inSyscall = true; - // DTB will sometimes need the machine instruction for when - // faults happen. So we will set it here, prior to the DTB - // possibly needing it for its fault. - thread[tid]->setInst( - static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); - // Execute the trap. Although it's slightly unrealistic in // terms of timing (as it doesn't wait for the full timing of // the trap event to complete before updating state), it's @@ -1069,12 +1113,39 @@ template <class Impl> void DefaultCommit<Impl>::getInsts() { + DPRINTF(Commit, "Getting instructions from Rename stage.\n"); + +#if THE_ISA == ALPHA_ISA + // Read any renamed instructions and place them into the ROB. + int insts_to_process = std::min((int)renameWidth, fromRename->size); +#else // Read any renamed instructions and place them into the ROB. - int insts_to_process = min((int)renameWidth, fromRename->size); + int insts_to_process = std::min((int)renameWidth, + (int)(fromRename->size + skidBuffer.size())); + int rename_idx = 0; - for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) - { - DynInstPtr inst = fromRename->insts[inst_num]; + DPRINTF(Commit, "%i insts available to process. Rename Insts:%i " + "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size, + skidBuffer.size()); +#endif + + + for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) { + DynInstPtr inst; + +#if THE_ISA == ALPHA_ISA + inst = fromRename->insts[inst_num]; +#else + // Get insts from skidBuffer or from Rename + if (skidBuffer.size() > 0) { + DPRINTF(Commit, "Grabbing skidbuffer inst.\n"); + inst = skidBuffer.front(); + skidBuffer.pop(); + } else { + DPRINTF(Commit, "Grabbing rename inst.\n"); + inst = fromRename->insts[rename_idx++]; + } +#endif int tid = inst->threadNumber; if (!inst->isSquashed() && @@ -1095,6 +1166,53 @@ DefaultCommit<Impl>::getInsts() inst->readPC(), inst->seqNum, tid); } } + +#if THE_ISA != ALPHA_ISA + if (rename_idx < fromRename->size) { + DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n"); + + for (; + rename_idx < fromRename->size; + rename_idx++) { + DynInstPtr inst = fromRename->insts[rename_idx]; + int tid = inst->threadNumber; + + if (!inst->isSquashed()) { + DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", + "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + skidBuffer.push(inst); + } else { + DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " + "squashed, skipping.\n", + inst->readPC(), inst->seqNum, tid); + } + } + } +#endif + +} + +template <class Impl> +void +DefaultCommit<Impl>::skidInsert() +{ + DPRINTF(Commit, "Attempting to any instructions from rename into " + "skidBuffer.\n"); + + for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) { + DynInstPtr inst = fromRename->insts[inst_num]; + int tid = inst->threadNumber; + + if (!inst->isSquashed()) { + DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", + "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + skidBuffer.push(inst); + } else { + DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " + "squashed, skipping.\n", + inst->readPC(), inst->seqNum, tid); + } + } } template <class Impl> @@ -1124,7 +1242,7 @@ template <class Impl> bool DefaultCommit<Impl>::robDoneSquashing() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1221,8 +1339,8 @@ template<class Impl> int DefaultCommit<Impl>::roundRobin() { - list<unsigned>::iterator pri_iter = priority_list.begin(); - list<unsigned>::iterator end = priority_list.end(); + std::list<unsigned>::iterator pri_iter = priority_list.begin(); + std::list<unsigned>::iterator end = priority_list.end(); while (pri_iter != end) { unsigned tid = *pri_iter; @@ -1252,7 +1370,7 @@ DefaultCommit<Impl>::oldestReady() unsigned oldest = 0; bool first = true; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7d2727401..af032132e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -441,7 +441,7 @@ FullO3CPU<Impl>::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut || - getState() == SimObject::DrainedTiming) { + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -577,39 +577,19 @@ void FullO3CPU<Impl>::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - unscheduleTickEvent(); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - } -*/ } template <class Impl> void FullO3CPU<Impl>::haltContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - - removeThread(tid); - } -*/ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); } template <class Impl> @@ -687,11 +667,12 @@ FullO3CPU<Impl>::removeThread(unsigned tid) } // Squash Throughout Pipeline - fetch.squash(0,tid); + InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; + fetch.squash(0, squash_seq_num, true, tid); decode.squash(tid); - rename.squash(tid); + rename.squash(squash_seq_num, tid); iew.squash(tid); - commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); + commit.rob->squash(squash_seq_num, tid); assert(iew.ldstQueue.getCount(tid) == 0); @@ -765,7 +746,8 @@ template <class Impl> void FullO3CPU<Impl>::serialize(std::ostream &os) { - SERIALIZE_ENUM(_status); + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); BaseCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); @@ -786,7 +768,8 @@ template <class Impl> void FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) { - UNSERIALIZE_ENUM(_status); + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); BaseCPU::unserialize(cp, section); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); @@ -803,7 +786,7 @@ FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) } template <class Impl> -bool +unsigned int FullO3CPU<Impl>::drain(Event *drain_event) { drainCount = 0; @@ -815,7 +798,7 @@ FullO3CPU<Impl>::drain(Event *drain_event) // Wake the CPU and record activity so everything can drain out if // the CPU was not able to immediately drain. - if (getState() != SimObject::DrainedTiming) { + if (getState() != SimObject::Drained) { // A bit of a hack...set the drainEvent after all the drain() // calls have been made, that way if all of the stages drain // immediately, the signalDrained() function knows not to call @@ -825,9 +808,9 @@ FullO3CPU<Impl>::drain(Event *drain_event) wakeCPU(); activityRec.activity(); - return false; + return 1; } else { - return true; + return 0; } } @@ -835,19 +818,21 @@ template <class Impl> void FullO3CPU<Impl>::resume() { + assert(system->getMemoryMode() == System::Timing); fetch.resume(); decode.resume(); rename.resume(); iew.resume(); commit.resume(); + changeState(SimObject::Running); + if (_status == SwitchedOut || _status == Idle) return; if (!tickEvent.scheduled()) tickEvent.schedule(curTick); _status = Running; - changeState(SimObject::Timing); } template <class Impl> @@ -858,7 +843,7 @@ FullO3CPU<Impl>::signalDrained() if (tickEvent.scheduled()) tickEvent.squash(); - changeState(SimObject::DrainedTiming); + changeState(SimObject::Drained); if (drainEvent) { drainEvent->process(); @@ -1063,7 +1048,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val); } @@ -1072,7 +1058,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatReg(phys_reg, val, 64); } @@ -1081,7 +1068,8 @@ template <class Impl> void FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); regFile.setFloatRegBits(phys_reg, val); } @@ -1114,7 +1102,6 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } -#if THE_ISA != ALPHA_ISA template <class Impl> uint64_t FullO3CPU<Impl>::readNextNPC(unsigned tid) @@ -1124,11 +1111,10 @@ FullO3CPU<Impl>::readNextNPC(unsigned tid) template <class Impl> void -FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid) +FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid) { commit.setNextNPC(val, tid); } -#endif template <class Impl> typename FullO3CPU<Impl>::ListIt @@ -1178,7 +1164,9 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) template <class Impl> void -FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) +FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid, + bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1209,6 +1197,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) while (inst_it != end_it) { assert(!instList.empty()); +#if THE_ISA != ALPHA_ISA + if(!squash_delay_slot && + delay_slot_seq_num >= (*inst_it)->seqNum) { + break; + } +#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2fbd013ac..7e18571f1 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -38,7 +38,7 @@ #include <set> #include <vector> -#include "arch/isa_traits.hh" +#include "arch/types.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "config/full_system.hh" @@ -330,7 +330,7 @@ class FullO3CPU : public BaseO3CPU /** Starts draining the CPU's pipeline of all instructions in * order to stop all memory accesses. */ - virtual bool drain(Event *drain_event); + virtual unsigned int drain(Event *drain_event); /** Resumes execution after a drain. */ virtual void resume(); @@ -449,8 +449,10 @@ class FullO3CPU : public BaseO3CPU */ void removeFrontInst(DynInstPtr &inst); - /** Remove all instructions that are not currently in the ROB. */ - void removeInstsNotInROB(unsigned tid); + /** Remove all instructions that are not currently in the ROB. + * There's also an option to not squash delay slot instructions.*/ + void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 7f5ecbc26..4a845e670 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -276,6 +276,19 @@ class DefaultDecode /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/ + Addr bdelayDoneSeqNum[Impl::MaxThreads]; + + /** Instruction used for squashing branch (used for MIPS)*/ + DynInstPtr squashInst[Impl::MaxThreads]; + + /** Tells when their is a pending delay slot inst. to send + * to rename. If there is, then wait squash after the next + * instruction (used for MIPS). + */ + bool squashAfterDelaySlot[Impl::MaxThreads]; + + /** Stat for total number of idle cycles. */ Stats::Scalar<> decodeIdleCycles; /** Stat for total number of blocked cycles. */ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8b851c032..160845378 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -30,8 +30,6 @@ #include "cpu/o3/decode.hh" -using namespace std; - template<class Impl> DefaultDecode<Impl>::DefaultDecode(Params *params) : renameToDecodeDelay(params->renameToDecodeDelay), @@ -50,6 +48,8 @@ DefaultDecode<Impl>::DefaultDecode(Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; + + squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -158,7 +158,7 @@ DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) template<class Impl> void -DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(Decode, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -278,13 +278,25 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); +#if THE_ISA == ALPHA_ISA toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + InstSeqNum squash_seq_num = inst->seqNum; +#else + toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; + squashAfterDelaySlot[tid] = false; + + InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; +#endif + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { @@ -296,7 +308,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && - fromFetch->insts[i]->seqNum > inst->seqNum) { + fromFetch->insts[i]->seqNum > squash_seq_num) { fromFetch->insts[i]->setSquashed(); } } @@ -304,15 +316,35 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif skidBuffer[tid].pop(); } // Squash instructions up until this one - cpu->removeInstsUntil(inst->seqNum, tid); + cpu->removeInstsUntil(squash_seq_num, tid); } template<class Impl> @@ -392,7 +424,7 @@ template<class Impl> bool DefaultDecode<Impl>::skidsEmpty() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { if (!skidBuffer[*threads++].empty()) @@ -408,7 +440,7 @@ DefaultDecode<Impl>::updateStatus() { bool any_unblocking = false; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); threads = (*activeThreads).begin(); @@ -565,7 +597,7 @@ DefaultDecode<Impl>::tick() toRenameIndex = 0; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); sortInsts(); @@ -611,7 +643,7 @@ DefaultDecode<Impl>::decode(bool &status_change, unsigned tid) // will allow, as long as it is not currently blocked. if (decodeStatus[tid] == Running || decodeStatus[tid] == Idle) { - DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run " + DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run " "stage.\n",tid); decodeInsts(tid); @@ -710,6 +742,9 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid) // Ensure that if it was predicted as a branch, it really is a // branch. if (inst->predTaken() && !inst->isControl()) { + DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC, + inst->nextPC + 4); + panic("Instruction predicted as a branch!"); ++decodeControlMispred; @@ -730,12 +765,43 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end +#if THE_ISA == ALPHA_ISA squash(inst, inst->threadNumber); inst->setPredTarg(inst->branchTarget()); - break; +#else + // If mispredicted as taken, then ignore delay slot + // instruction... else keep delay slot and squash + // after it is sent to rename + if (inst->predTaken() && inst->isCondDelaySlot()) { + DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." + "[sn:%i] PC %#x mispredicted as taken.\n", tid, + inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum; + squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); + break; + } else { + DPRINTF(Decode, "[tid:%i]: Misprediction detected at " + "[sn:%i] PC %#x, will squash after delay slot " + "inst. is sent to Rename\n", + tid, inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum + 1; + squashAfterDelaySlot[tid] = true; + squashInst[tid] = inst; + continue; + } +#endif } } + + if (squashAfterDelaySlot[tid]) { + assert(!inst->isSquashed()); + squash(squashInst[tid], squashInst[tid]->threadNumber); + squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget()); + assert(!inst->isSquashed()); + break; + } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index a2cdf2dba..279513493 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -34,12 +34,19 @@ #include "arch/isa_specific.hh" #if THE_ISA == ALPHA_ISA -template <class Impl> -class AlphaDynInst; - -struct AlphaSimpleImpl; - -typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; + template <class Impl> class AlphaDynInst; + struct AlphaSimpleImpl; + typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#elif THE_ISA == MIPS_ISA + template <class Impl> class MipsDynInst; + struct MipsSimpleImpl; + typedef MipsDynInst<MipsSimpleImpl> O3DynInst; +#elif THE_ISA == SPARC_ISA + template <class Impl> class SparcDynInst; + struct SparcSimpleImpl; + typedef SparcDynInst<SparcSimpleImpl> O3DynInst; +#else + #error "O3DynInst not defined for this ISA" #endif #endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 85654cebc..1a2ca32a4 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -106,6 +106,7 @@ class DefaultFetch virtual void recvRetry(); }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -218,9 +219,10 @@ class DefaultFetch * @param next_PC Next PC variable passed in by reference. It is * expected to be set to the current PC; it will be updated with what * the next PC will be. + * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -255,7 +257,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const Addr &new_PC, unsigned tid); + void squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. @@ -340,14 +343,12 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** Per-thread next Next PC. * This is not a real register but is used for * architectures that use a branch-delay slot. * (such as MIPS or Sparc) */ Addr nextNPC[Impl::MaxThreads]; -#endif /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -360,6 +361,19 @@ class DefaultFetch /** Tracks how many instructions has been fetched this cycle. */ int numInst; + /** Tracks delay slot information for threads in ISAs which use + * delay slots; + */ + struct DelaySlotInfo { + InstSeqNum delaySlotSeqNum; + InstSeqNum branchSeqNum; + int numInsts; + Addr targetAddr; + bool targetReady; + }; + + DelaySlotInfo delaySlotInfo[Impl::MaxThreads]; + /** Source of possible stalls. */ struct Stalls { bool decode; @@ -404,6 +418,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 39a13f9f8..990db88ac 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -51,9 +51,6 @@ #include <algorithm> -using namespace std; -using namespace TheISA; - template<class Impl> Tick DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) @@ -118,7 +115,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Set fetch stage's status to inactive. _status = Inactive; - string policy = params->smtFetchPolicy; + std::string policy = params->smtFetchPolicy; // Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -162,15 +159,22 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; + + delaySlotInfo[tid].branchSeqNum = -1; + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; - stalls[tid].decode = 0; - stalls[tid].rename = 0; - stalls[tid].iew = 0; - stalls[tid].commit = 0; + stalls[tid].decode = false; + stalls[tid].rename = false; + stalls[tid].iew = false; + stalls[tid].commit = false; } // Get the size of an instruction. - instSize = sizeof(MachInst); + instSize = sizeof(TheISA::MachInst); } template <class Impl> @@ -286,6 +290,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) } #endif + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -307,7 +314,7 @@ DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) template<class Impl> void -DefaultFetch<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(Fetch, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -358,6 +365,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; if (!drainPending) { // Wake up the CPU (if it went to sleep and was waiting on @@ -423,6 +431,10 @@ DefaultFetch<Impl>::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if THE_ISA != ALPHA_ISA nextNPC[i] = cpu->readNextNPC(i); + delaySlotInfo[i].branchSeqNum = -1; + delaySlotInfo[i].numInsts = 0; + delaySlotInfo[i].targetAddr = 0; + delaySlotInfo[i].targetReady = false; #endif fetchStatus[i] = Running; } @@ -471,7 +483,8 @@ DefaultFetch<Impl>::switchToInactive() template <class Impl> bool -DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -479,12 +492,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) bool predict_taken; if (!inst->isControl()) { +#if THE_ISA == ALPHA_ISA next_PC = next_PC + instSize; inst->setPredTarg(next_PC); +#else + Addr cur_PC = next_PC; + next_PC = cur_PC + instSize; //next_NPC; + next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; + inst->setPredTarg(next_NPC); +#endif return false; } - predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + int tid = inst->threadNumber; +#if THE_ISA == ALPHA_ISA + predict_taken = branchPred.predict(inst, next_PC, tid); +#else + Addr pred_PC = next_PC; + predict_taken = branchPred.predict(inst, pred_PC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + } + + if (predict_taken) { + next_PC = next_NPC; + next_NPC = pred_PC; + + // Update delay slot info + ++delaySlotInfo[tid].numInsts; + delaySlotInfo[tid].targetAddr = pred_PC; + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, + delaySlotInfo[tid].numInsts); + } else { // !predict_taken + if (inst->isCondDelaySlot()) { + next_PC = pred_PC; + // The delay slot is skipped here if there is on + // prediction + } else { + next_PC = next_NPC; + // No need to declare a delay slot here since + // there is no for the pred. target to jump + } + + next_NPC = next_NPC + instSize; + } +#endif ++fetchedBranches; @@ -519,6 +574,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -550,7 +610,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataDynamic(new uint8_t[cacheBlkSize]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -595,6 +658,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) PC[tid] = new_PC; nextPC[tid] = new_PC + instSize; + nextNPC[tid] = new_PC + (2 * instSize); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -628,6 +692,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, doSquash(new_PC, tid); +#if THE_ISA != ALPHA_ISA + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } +#endif + // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); @@ -664,7 +736,7 @@ typename DefaultFetch<Impl>::FetchStatus DefaultFetch<Impl>::updateFetchStatus() { //Check Running - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { @@ -701,21 +773,33 @@ DefaultFetch<Impl>::updateFetchStatus() template <class Impl> void -DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid) +DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, tid); +#if THE_ISA == ALPHA_ISA + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid, true, 0); +#else + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } + // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); + cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); +#endif } template <class Impl> void DefaultFetch<Impl>::tick() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); bool status_change = false; wroteToTimeBuffer = false; @@ -817,8 +901,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // In any case, squash. - squash(fromCommit->commitInfo[tid].nextPC,tid); + squash(fromCommit->commitInfo[tid].nextPC, + doneSeqNum, + fromCommit->commitInfo[tid].squashDelaySlot, + tid); // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -865,9 +957,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fetchStatus[tid] != Squashing) { + +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; +#endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].doneSeqNum, + doneSeqNum, tid); return true; @@ -973,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) } Addr next_PC = fetch_PC; + Addr next_NPC = next_PC + instSize; InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -991,10 +1090,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; + // Need to keep track of whether or not a delay slot + // instruction has been fetched + for (; offset < cacheBlkSize && numInst < fetchWidth && - !predicted_branch; + (!predicted_branch || delaySlotInfo[tid].numInsts > 0); ++numInst) { // Get a sequence number. @@ -1004,7 +1106,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) assert(offset <= cacheBlkSize - instSize); // Get the instruction from the array of the cache line. - inst = gtoh(*reinterpret_cast<MachInst *> + inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> (&cacheData[tid][offset])); ext_inst = TheISA::makeExtMI(inst, fetch_PC); @@ -1031,7 +1133,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) instruction->staticInst, instruction->readPC(),tid); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, + next_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1057,7 +1160,41 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } - offset+= instSize; + offset += instSize; + +#if THE_ISA != ALPHA_ISA + if (predicted_branch) { + delaySlotInfo[tid].branchSeqNum = inst_seq; + + DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", + tid, inst_seq); + continue; + } else if (delaySlotInfo[tid].numInsts > 0) { + --delaySlotInfo[tid].numInsts; + + // It's OK to set PC to target of branch + if (delaySlotInfo[tid].numInsts == 0) { + delaySlotInfo[tid].targetReady = true; + + // Break the looping condition + predicted_branch = true; + } + + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" + " process.\n", tid, delaySlotInfo[tid].numInsts); + } +#endif + } + + if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } } @@ -1068,18 +1205,26 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Now that fetching is completed, update the PC to signify what the next // cycle will be. if (fault == NoFault) { - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); - #if THE_ISA == ALPHA_ISA + DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; #else - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; - nextPC[tid] = next_PC + instSize; + if (delaySlotInfo[tid].targetReady && + delaySlotInfo[tid].numInsts == 0) { + // Set PC to target + PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC + nextPC[tid] = next_PC + instSize; //next_NPC + nextNPC[tid] = next_PC + (2 * instSize); + + delaySlotInfo[tid].targetReady = false; + } else { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; + } - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1123,9 +1268,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); #else // !FULL_SYSTEM - warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); #endif // FULL_SYSTEM } } @@ -1202,8 +1347,8 @@ template<class Impl> int DefaultFetch<Impl>::roundRobin() { - list<unsigned>::iterator pri_iter = priorityList.begin(); - list<unsigned>::iterator end = priorityList.end(); + std::list<unsigned>::iterator pri_iter = priorityList.begin(); + std::list<unsigned>::iterator end = priorityList.end(); int high_pri; @@ -1232,9 +1377,9 @@ template<class Impl> int DefaultFetch<Impl>::iqCount() { - priority_queue<unsigned> PQ; + std::priority_queue<unsigned> PQ; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1262,10 +1407,10 @@ template<class Impl> int DefaultFetch<Impl>::lsqCount() { - priority_queue<unsigned> PQ; + std::priority_queue<unsigned> PQ; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1293,7 +1438,7 @@ template<class Impl> int DefaultFetch<Impl>::branchCount() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index fb9afde54..76fa008ee 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -31,11 +31,12 @@ #ifndef __CPU_O3_IEW_HH__ #define __CPU_O3_IEW_HH__ +#include "config/full_system.hh" + #include <queue> #include "base/statistics.hh" #include "base/timebuf.hh" -#include "config/full_system.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/lsq.hh" @@ -215,7 +216,7 @@ class DefaultIEW if (++wbOutstanding == wbMax) ableToIssue = false; DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); -#if DEBUG +#ifdef DEBUG wbList.insert(sn); #endif } @@ -225,13 +226,13 @@ class DefaultIEW if (wbOutstanding-- == wbMax) ableToIssue = true; DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); -#if DEBUG +#ifdef DEBUG assert(wbList.find(sn) != wbList.end()); wbList.erase(sn); #endif } -#if DEBUG +#ifdef DEBUG std::set<InstSeqNum> wbList; void dumpWb() @@ -404,6 +405,9 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; + /** Keeps track of the last valid branch delay slot instss for threads */ + InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; + /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 684ae2295..cdc36c6c3 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -38,8 +38,6 @@ #include "cpu/o3/fu_pool.hh" #include "cpu/o3/iew.hh" -using namespace std; - template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) : issueToExecQueue(params->backComSize, params->forwardComSize), @@ -73,6 +71,7 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; + bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -335,7 +334,7 @@ DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template<class Impl> void -DefaultIEW<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(IEW, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -428,13 +427,31 @@ DefaultIEW<Impl>::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. +#if THE_ISA == ALPHA_ISA ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); - +#else + ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); +#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. - while (!skidBuffer[tid].empty()) { + DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", + tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + while (!skidBuffer[tid].empty()) { +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= + fromCommit->commitInfo[tid].bdelayDoneSeqNum) { + DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " + "that occur before delay slot [sn:%i].\n", + fromCommit->commitInfo[tid].bdelayDoneSeqNum, + tid); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " + "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); + } +#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,6 +462,8 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } + bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; + emptyRenameInsts(tid); } @@ -458,10 +477,26 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); - toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; + +#if THE_ISA == ALPHA_ISA toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toCommit->nextPC[tid] = inst->readNextPC(); +#else + bool branch_taken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toCommit->branchTaken[tid] = branch_taken; + + toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot(); + + if (inst->isCondDelaySlot() && branch_taken) { + toCommit->nextPC[tid] = inst->readNextPC(); + } else { + toCommit->nextPC[tid] = inst->readNextNPC(); + } +#endif toCommit->includeSquashInst[tid] = false; @@ -626,7 +661,7 @@ DefaultIEW<Impl>::skidCount() { int max=0; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned thread_count = skidBuffer[*threads++].size(); @@ -641,7 +676,7 @@ template<class Impl> bool DefaultIEW<Impl>::skidsEmpty() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { if (!skidBuffer[*threads++].empty()) @@ -657,7 +692,7 @@ DefaultIEW<Impl>::updateStatus() { bool any_unblocking = false; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); threads = (*activeThreads).begin(); @@ -825,9 +860,11 @@ DefaultIEW<Impl>::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); #endif +#endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); } @@ -837,7 +874,23 @@ template <class Impl> void DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " + "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { + DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" + " that occurs at or before delay slot [sn:%i].\n", + tid, bdelayDoneSeqNum[tid]); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " + "[sn:%i].\n", tid, insts[tid].front()->seqNum); + } +#endif + if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -1120,7 +1173,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) } if (!insts_to_dispatch.empty()) { - DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n"); + DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid); block(tid); toRename->iewUnblock[tid] = false; } @@ -1140,13 +1193,13 @@ DefaultIEW<Impl>::printAvailableInsts() { int inst = 0; - cout << "Available Instructions: "; + std::cout << "Available Instructions: "; while (fromIssue->insts[inst]) { - if (inst%3==0) cout << "\n\t"; + if (inst%3==0) std::cout << "\n\t"; - cout << "PC: " << fromIssue->insts[inst]->readPC() + std::cout << "PC: " << fromIssue->insts[inst]->readPC() << " TN: " << fromIssue->insts[inst]->threadNumber << " SN: " << fromIssue->insts[inst]->seqNum << " | "; @@ -1154,7 +1207,7 @@ DefaultIEW<Impl>::printAvailableInsts() } - cout << "\n"; + std::cout << "\n"; } template <class Impl> @@ -1164,7 +1217,7 @@ DefaultIEW<Impl>::executeInsts() wbNumInst = 0; wbCycle = 0; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1263,9 +1316,13 @@ DefaultIEW<Impl>::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); +#if THE_ISA == ALPHA_ISA DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextPC); - +#else + DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextNPC); +#endif // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); @@ -1384,7 +1441,7 @@ DefaultIEW<Impl>::tick() // Free function units marked as being freed this cycle. fuPool->processFreeUnits(); - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); // Check stall and squash signals, dispatch any instructions. while (threads != (*activeThreads).end()) { diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 4c69ca384..d745faf7b 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 36e0842be..e7991662b 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -37,8 +37,6 @@ #include "cpu/o3/fu_pool.hh" #include "cpu/o3/inst_queue.hh" -using namespace std; - template <class Impl> InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst, int fu_idx, @@ -100,7 +98,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) resetState(); - string policy = params->smtIQPolicy; + std::string policy = params->smtIQPolicy; //Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -279,7 +277,7 @@ InstructionQueue<Impl>::regStats() ; for (int i=0; i<Num_OpClasses; ++i) { - stringstream subname; + std::stringstream subname; subname << opClassStrings[i] << "_delay"; issueDelayDist.subname(i, subname.str()); } @@ -359,7 +357,7 @@ InstructionQueue<Impl>::resetState() template <class Impl> void -InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr) +InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(IQ, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -421,8 +419,8 @@ InstructionQueue<Impl>::resetEntries() if (iqPolicy != Dynamic || numThreads > 1) { int active_threads = (*activeThreads).size(); - list<unsigned>::iterator threads = (*activeThreads).begin(); - list<unsigned>::iterator list_end = (*activeThreads).end(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator list_end = (*activeThreads).end(); while (threads != list_end) { if (iqPolicy == Partitioned) { @@ -993,7 +991,11 @@ InstructionQueue<Impl>::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. +#if THE_ISA == ALPHA_ISA squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; +#else + squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh index f8a9dd8cc..4937589e3 100755 --- a/src/cpu/o3/isa_specific.hh +++ b/src/cpu/o3/isa_specific.hh @@ -35,6 +35,11 @@ #include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/params.hh" #include "cpu/o3/alpha/dyn_inst.hh" +#elif THE_ISA == MIPS_ISA + #include "cpu/o3/mips/cpu.hh" + #include "cpu/o3/mips/impl.hh" + #include "cpu/o3/mips/params.hh" + #include "cpu/o3/mips/dyn_inst.hh" #else - #error "O3CPU doesnt support this ISA" + #error "ISA-specific header files O3CPU not defined ISA" #endif diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index d5890950f..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -70,7 +70,7 @@ class LSQ { * to work. For now it just returns the port from one of the * threads. */ - Port *getDcachePort() { return thread[0].getDcachePort(); } + Port *getDcachePort() { return &dcachePort; } /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); @@ -258,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -274,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -303,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 89fd1a71d..db2c253e1 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -29,23 +29,66 @@ */ #include <algorithm> +#include <list> #include <string> #include "cpu/o3/lsq.hh" -using namespace std; +template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); //**********************************************/ //************ Handle SMT Parameters ***********/ //**********************************************/ - string policy = params->smtLSQPolicy; + std::string policy = params->smtLSQPolicy; //Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -94,7 +137,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -118,7 +162,7 @@ LSQ<Impl>::regStats() template<class Impl> void -LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) +LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { activeThreads = at_ptr; assert(activeThreads != 0); @@ -130,6 +174,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -182,8 +228,8 @@ LSQ<Impl>::resetEntries() if (lsqPolicy != Dynamic || numThreads > 1) { int active_threads = (*activeThreads).size(); - list<unsigned>::iterator threads = (*activeThreads).begin(); - list<unsigned>::iterator list_end = (*activeThreads).end(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator list_end = (*activeThreads).end(); int maxEntries; @@ -221,7 +267,7 @@ template<class Impl> void LSQ<Impl>::tick() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -270,7 +316,7 @@ template<class Impl> void LSQ<Impl>::writebackStores() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -289,7 +335,7 @@ bool LSQ<Impl>::violation() { /* Answers: Does Anybody Have a Violation?*/ - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -306,7 +352,7 @@ LSQ<Impl>::getCount() { unsigned total = 0; - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -322,7 +368,7 @@ LSQ<Impl>::numLoads() { unsigned total = 0; - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -338,7 +384,7 @@ LSQ<Impl>::numStores() { unsigned total = 0; - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -354,7 +400,7 @@ LSQ<Impl>::numLoadsReady() { unsigned total = 0; - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -370,7 +416,7 @@ LSQ<Impl>::numFreeEntries() { unsigned total = 0; - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -394,7 +440,7 @@ template<class Impl> bool LSQ<Impl>::isFull() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -421,7 +467,7 @@ template<class Impl> bool LSQ<Impl>::lqFull() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -448,7 +494,7 @@ template<class Impl> bool LSQ<Impl>::sqFull() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -475,7 +521,7 @@ template<class Impl> bool LSQ<Impl>::isStalled() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -500,7 +546,7 @@ template<class Impl> bool LSQ<Impl>::hasStoresToWB() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); if ((*activeThreads).empty()) return false; @@ -518,7 +564,7 @@ template<class Impl> bool LSQ<Impl>::willWB() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; @@ -533,7 +579,7 @@ template<class Impl> void LSQ<Impl>::dumpInsts() { - list<unsigned>::iterator active_threads = (*activeThreads).begin(); + std::list<unsigned>::iterator active_threads = (*activeThreads).begin(); while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 4d7a8350b..512b5a63c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -64,6 +64,7 @@ class LSQUnit { typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,17 +72,12 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ std::string name() const; - /** Returns the dcache port. - * @todo: Remove this once the port moves up to the LSQ level. - */ - Port *getDcachePort() { return dcachePort; } - /** Registers statistics. */ void regStats(); @@ -92,6 +88,10 @@ class LSQUnit { void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -211,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -221,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -244,54 +244,11 @@ class LSQUnit { /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; + /** Pointer to the LSQ. */ + LSQ *lsq; - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - O3CPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(O3CPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); - - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -644,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + iewStage->decrWb(load_inst->seqNum); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -658,7 +616,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -666,9 +624,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -678,8 +633,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 8e951534f..4f5dbbf1c 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -31,6 +31,7 @@ #include "config/use_checker.hh" +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" #include "mem/packet.hh" @@ -96,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -145,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -168,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -180,7 +141,6 @@ void LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); #if USE_CHECKER if (cpu->checker) { @@ -588,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -830,6 +790,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); assert(!inst->isStore()); ++lsqIgnoredResponses; return; @@ -911,6 +872,7 @@ LSQUnit<Impl>::recvRetry() } else { // Still blocked! ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/mips/cpu.cc b/src/cpu/o3/mips/cpu.cc new file mode 100755 index 000000000..420f460b2 --- /dev/null +++ b/src/cpu/o3/mips/cpu.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/cpu_impl.hh" +#include "cpu/o3/mips/dyn_inst.hh" + +// Force instantiation of MipsO3CPU for all the implemntations that are +// needed. Consider merging this and mips_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (mips_impl.cc?). +template class MipsO3CPU<MipsSimpleImpl>; diff --git a/src/cpu/o3/mips/cpu.hh b/src/cpu/o3/mips/cpu.hh new file mode 100755 index 000000000..bf04b9f69 --- /dev/null +++ b/src/cpu/o3/mips/cpu.hh @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_CPU_HH__ +#define __CPU_O3_MIPS_CPU_HH__ + +#include "arch/mips/regfile.hh" +#include "arch/mips/syscallreturn.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" +#include "sim/faults.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * MipsO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template <class Impl> +class MipsO3CPU : public FullO3CPU<Impl> +{ + public: + typedef O3ThreadState<Impl> ImplState; + typedef O3ThreadState<Impl> Thread; + typedef typename Impl::Params Params; + + /** Constructs an MipsO3CPU with the given parameters. */ + MipsO3CPU(Params *params); + + /** Registers statistics. */ + void regStats(); + + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Reads a miscellaneous register. */ + TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + TheISA::MiscReg readMiscRegWithEffect(int misc_reg, + Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const TheISA::MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, + const TheISA::MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + TheISA::IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, TheISA::IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); + + /** CPU read function, forwards read to LSQ. */ + template <class T> + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template <class T> + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_MIPS_CPU_HH__ diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc new file mode 100644 index 000000000..f1c3b33a5 --- /dev/null +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include <string> + +#include "cpu/base.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/fu_pool.hh" +#include "sim/builder.hh" + +class DerivO3CPU : public MipsO3CPU<MipsSimpleImpl> +{ + public: + DerivO3CPU(MipsSimpleParams *p) + : MipsO3CPU<MipsSimpleImpl>(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +Param<int> clock; +Param<int> numThreads; +Param<int> activity; + +SimObjectVectorParam<Process *> workload; + +SimObjectParam<MemObject *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +Param<unsigned> cachePorts; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; +Param<unsigned> issueWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; +SimObjectParam<FUPool *> fuPool; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; +Param<Tick> trapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; + +Param<std::string> predType; +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM_DFLT(activity, "Initial activity count", 0), + + INIT_PARAM(workload, "Processes to run"), + + INIT_PARAM(mem, "Memory"), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), + INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + +CREATE_SIM_OBJECT(DerivO3CPU) +{ + DerivO3CPU *cpu; + + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + MipsSimpleParams *params = new MipsSimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + params->activity = activity; + + params->workload = workload; + + params->mem = mem; + + params->checker = checker; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->cachePorts = cachePorts; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; + params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; + params->fuPool = fuPool; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + params->trapLatency = trapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivO3CPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) + diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh new file mode 100644 index 000000000..e08741626 --- /dev/null +++ b/src/cpu/o3/mips/cpu_impl.hh @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "config/use_checker.hh" + +#include "arch/mips/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/mips/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +template <class Impl> +MipsO3CPU<Impl>::MipsO3CPU(Params *params) + : FullO3CPU<Impl>(params) +{ + DPRINTF(O3CPU, "Creating MipsO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + + + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); + + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + MipsTC<Impl> *mips_tc = + new MipsTC<Impl>; + + tc = mips_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext<MipsTC<Impl> >( + mips_tc, this->checker); + } +#endif + + mips_tc->cpu = this; + mips_tc->thread = this->thread[i]; + + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template <class Impl> +void +MipsO3CPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template <class Impl> +MiscReg +MipsO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template <class Impl> +MiscReg +MipsO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template <class Impl> +Fault +MipsO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template <class Impl> +Fault +MipsO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template <class Impl> +void +MipsO3CPU<Impl>::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); + + DPRINTF(O3CPU, "[tid:%i] Register 2 is %i ", tid, this->readIntReg(2)); +} + +template <class Impl> +TheISA::IntReg +MipsO3CPU<Impl>::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(MipsISA::ArgumentReg0 + i, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(MipsISA::ArgumentReg0 + i, val, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. + if (return_value.successful()) { + // no error + this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid); + this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(TheISA::SyscallSuccessReg, + (TheISA::IntReg) -1, tid); + this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/mips/dyn_inst.cc b/src/cpu/o3/mips/dyn_inst.cc new file mode 100755 index 000000000..216aa7d2c --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/dyn_inst_impl.hh" +#include "cpu/o3/mips/impl.hh" + +// Force instantiation of MipsDynInst for all the implementations that +// are needed. +template class MipsDynInst<MipsSimpleImpl>; diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh new file mode 100755 index 000000000..06bdfcec4 --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_DYN_INST_HH__ +#define __CPU_O3_MIPS_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" + +class Packet; + +/** + * Mostly implementation & ISA specific MipsDynInst. As with most + * other classes in the new CPU model, it is templated on the Impl to + * allow for passing in of all types, such as the CPU type and the ISA + * type. The MipsDynInst serves as the primary interface to the CPU + * for instructions that are executing. + */ +template <class Impl> +class MipsDynInst : public BaseDynInst<Impl> +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::O3CPU O3CPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Extended machine instruction type. */ + typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + O3CPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + MipsDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute(); + + /** Initiates the access. Only valid for memory operations. */ + Fault initiateAcc(); + + /** Completes the access. Only valid for memory operations. */ + Fault completeAcc(Packet *pkt); + + private: + /** Initializes variables. */ + void initVars(); + + public: + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg) + { + return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return this->cpu->readMiscRegWithEffect(misc_reg, fault, + this->threadNumber); + } + + /** Sets a misc. register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return this->cpu->setMiscRegWithEffect(misc_reg, val, + this->threadNumber); + } + + /** Calls a syscall. */ + void syscall(int64_t callnum); + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + BaseDynInst<Impl>::setIntReg(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + BaseDynInst<Impl>::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + BaseDynInst<Impl>::setFloatReg(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + /** Does the memory access part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_MIPS_DYN_INST_HH__ + diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh new file mode 100755 index 000000000..57dec1ccf --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/mips/dyn_inst.hh" + +template <class Impl> +MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) +{ + initVars(); +} + +template <class Impl> +MipsDynInst<Impl>::MipsDynInst(StaticInstPtr &_staticInst) + : BaseDynInst<Impl>(_staticInst) +{ + initVars(); +} + +template <class Impl> +void +MipsDynInst<Impl>::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } +} + +template <class Impl> +Fault +MipsDynInst<Impl>::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +MipsDynInst<Impl>::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +MipsDynInst<Impl>::completeAcc(Packet *pkt) +{ + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); + + return this->fault; +} + +template <class Impl> +void +MipsDynInst<Impl>::syscall(int64_t callnum) +{ + this->cpu->syscall(callnum, this->threadNumber); +} + diff --git a/src/cpu/o3/mips/impl.hh b/src/cpu/o3/mips/impl.hh new file mode 100644 index 000000000..ac7181a19 --- /dev/null +++ b/src/cpu/o3/mips/impl.hh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_IMPL_HH__ +#define __CPU_O3_MIPS_IMPL_HH__ + +#include "arch/mips/isa_traits.hh" + +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/cpu_policy.hh" + + +// Forward declarations. +template <class Impl> +class MipsDynInst; + +template <class Impl> +class MipsO3CPU; + +/** Implementation specific struct that defines several key types to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific O3CPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct MipsSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used, which defines all of the CPU stages. */ + typedef SimpleCPUPolicy<MipsSimpleImpl> CPUPol; + + /** The DynInst type to be used. */ + typedef MipsDynInst<MipsSimpleImpl> DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr<DynInst> DynInstPtr; + + /** The O3CPU type to be used. */ + typedef MipsO3CPU<MipsSimpleImpl> O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; + + /** The Params to be passed to each stage. */ + typedef MipsSimpleParams Params; + + enum { + MaxWidth = 8, + MaxThreads = 4 + }; +}; + +/** The O3Impl to be used. */ +typedef MipsSimpleImpl O3CPUImpl; + +#endif // __CPU_O3_MIPS_IMPL_HH__ diff --git a/src/cpu/o3/mips/params.hh b/src/cpu/o3/mips/params.hh new file mode 100644 index 000000000..d1ac62e21 --- /dev/null +++ b/src/cpu/o3/mips/params.hh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_PARAMS_HH__ +#define __CPU_O3_MIPS_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +//class MipsDTB; +//class MipsITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the MipsO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class MipsSimpleParams : public O3Params +{ + public: + MipsSimpleParams() {} + +#if FULL_SYSTEM + //Full System Paramater Objects place here + MipsITB *itb; + MipsDTB *dtb; +#endif +}; + +#endif // __CPU_O3_MIPS_PARAMS_HH__ diff --git a/src/cpu/o3/mips/thread_context.cc b/src/cpu/o3/mips/thread_context.cc new file mode 100755 index 000000000..0061a2a63 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext<MipsSimpleImpl>; + diff --git a/src/cpu/o3/mips/thread_context.hh b/src/cpu/o3/mips/thread_context.hh new file mode 100644 index 000000000..26b1e2e7f --- /dev/null +++ b/src/cpu/o3/mips/thread_context.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "arch/mips/types.hh" +#include "cpu/o3/thread_context.hh" + +template <class Impl> +class MipsTC : public O3ThreadContext<Impl> +{ + public: + virtual uint64_t readNextNPC() + { + return this->cpu->readNextNPC(this->thread->readTid()); + } + + virtual void setNextNPC(uint64_t val) + { + this->cpu->setNextNPC(val, this->thread->readTid()); + } + + virtual void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) + { panic("Not supported on Mips!"); } + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/ras.hh b/src/cpu/o3/ras.hh index 5c8a93285..97846ed16 100644 --- a/src/cpu/o3/ras.hh +++ b/src/cpu/o3/ras.hh @@ -31,8 +31,7 @@ #ifndef __CPU_O3_RAS_HH__ #define __CPU_O3_RAS_HH__ -// For Addr type. -#include "arch/isa_traits.hh" +#include "sim/host.hh" #include <vector> /** Return address stack class, implements a simple RAS. */ diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index b6677b4b1..512cf0721 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -33,11 +33,11 @@ #define __CPU_O3_REGFILE_HH__ #include "arch/isa_traits.hh" -#include "arch/faults.hh" #include "arch/types.hh" #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/o3/comm.hh" +#include "sim/faults.hh" #if FULL_SYSTEM #include "kern/kernel_stats.hh" diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 034087feb..ba26a01dd 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -76,6 +76,7 @@ class DefaultRename // using a list instead of a queue. (Most other stages use a // queue) typedef std::list<DynInstPtr> InstQueue; + typedef typename std::list<DynInstPtr>::iterator ListIt; public: /** Overall rename status. Used to determine if the CPU can @@ -170,7 +171,7 @@ class DefaultRename void takeOverFrom(); /** Squashes all instructions in a thread. */ - void squash(unsigned tid); + void squash(const InstSeqNum &squash_seq_num, unsigned tid); /** Ticks rename, which processes all input signals and attempts to rename * as many instructions as possible. @@ -222,7 +223,7 @@ class DefaultRename bool unblock(unsigned tid); /** Executes actual squash, removing squashed instructions. */ - void doSquash(unsigned tid); + void doSquash(const InstSeqNum &squash_seq_num, unsigned tid); /** Removes a committed instruction's rename history. */ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 805a72808..892eb12cf 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include <list> @@ -33,8 +34,6 @@ #include "config/full_system.hh" #include "cpu/o3/rename.hh" -using namespace std; - template <class Impl> DefaultRename<Impl>::DefaultRename(Params *params) : iewToRenameDelay(params->iewToRenameDelay), @@ -222,7 +221,7 @@ DefaultRename<Impl>::initStage() template<class Impl> void -DefaultRename<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(Rename, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -271,7 +270,8 @@ DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { - typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin(); + typename std::list<RenameHistory>::iterator hb_it = + historyBuffer[i].begin(); while (!historyBuffer[i].empty()) { assert(hb_it != historyBuffer[i].end()); @@ -318,7 +318,7 @@ DefaultRename<Impl>::takeOverFrom() template <class Impl> void -DefaultRename<Impl>::squash(unsigned tid) +DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid) { DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid); @@ -341,19 +341,55 @@ DefaultRename<Impl>::squash(unsigned tid) unsigned squashCount = 0; for (int i=0; i<fromDecode->size; i++) { - if (fromDecode->insts[i]->threadNumber == tid) { + if (fromDecode->insts[i]->threadNumber == tid && + fromDecode->insts[i]->seqNum > squash_seq_num) { fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } + } + // Clear the instruction list and skid buffer in case they have any + // insts in them. Since we support multiple ISAs, we cant just: + // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is + // a possible delay slot inst for different architectures + // insts[tid].clear(); +#if THE_ISA == ALPHA_ISA insts[tid].clear(); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " + "[sn:%i].\n",tid, squash_seq_num); + ListIt ilist_it = insts[tid].begin(); + while (ilist_it != insts[tid].end()) { + if ((*ilist_it)->seqNum > squash_seq_num) { + (*ilist_it)->setSquashed(); + DPRINTF(Rename, "Squashing incoming decode instruction, " + "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); + } + ilist_it++; + } +#endif // Clear the skid buffer in case it has any data in it. + // See comments above. + // skidBuffer[tid].clear(); +#if THE_ISA == ALPHA_ISA skidBuffer[tid].clear(); - - doSquash(tid); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " + "until [sn:%i].\n", tid, squash_seq_num); + ListIt slist_it = skidBuffer[tid].begin(); + while (slist_it != skidBuffer[tid].end()) { + if ((*slist_it)->seqNum > squash_seq_num) { + (*slist_it)->setSquashed(); + DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" + "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); + } + slist_it++; + } +#endif + doSquash(squash_seq_num, tid); } template <class Impl> @@ -370,7 +406,7 @@ DefaultRename<Impl>::tick() sortInsts(); - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); // Check stall and squash signals. while (threads != (*activeThreads).end()) { @@ -572,7 +608,7 @@ DefaultRename<Impl>::renameInsts(unsigned tid) if (inst->isSquashed()) { DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is " "squashed, skipping.\n", - tid, inst->seqNum, inst->threadNumber,inst->readPC()); + tid, inst->seqNum, inst->readPC()); ++renameSquashedInsts; @@ -707,9 +743,11 @@ DefaultRename<Impl>::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i=0; i < numThreads; i++) assert(insts[i].empty()); #endif +#endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); @@ -720,7 +758,7 @@ template<class Impl> bool DefaultRename<Impl>::skidsEmpty() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { if (!skidBuffer[*threads++].empty()) @@ -736,7 +774,7 @@ DefaultRename<Impl>::updateStatus() { bool any_unblocking = false; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); threads = (*activeThreads).begin(); @@ -824,11 +862,10 @@ DefaultRename<Impl>::unblock(unsigned tid) template <class Impl> void -DefaultRename<Impl>::doSquash(unsigned tid) +DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid) { - typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].begin(); - - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; + typename std::list<RenameHistory>::iterator hb_it = + historyBuffer[tid].begin(); // After a syscall squashes everything, the history buffer may be empty // but the ROB may still be squashing instructions. @@ -866,7 +903,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid) "history buffer %u (size=%i), until [sn:%lli].\n", tid, tid, historyBuffer[tid].size(), inst_seq_num); - typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].end(); + typename std::list<RenameHistory>::iterator hb_it = + historyBuffer[tid].end(); --hb_it; @@ -963,8 +1001,9 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid) historyBuffer[tid].push_front(hb_entry); - DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, " - "[sn:%lli].\n",tid, + DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer " + "(size=%i), [sn:%lli].\n",tid, + historyBuffer[tid].size(), (*historyBuffer[tid].begin()).instSeqNum); // Tell the instruction to rename the appropriate destination @@ -1143,7 +1182,13 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); - squash(tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif + + squash(squashed_seq_num, tid); return true; } @@ -1258,7 +1303,7 @@ template <class Impl> void DefaultRename<Impl>::dumpHistory() { - typename list<RenameHistory>::iterator buf_it; + typename std::list<RenameHistory>::iterator buf_it; for (int i = 0; i < numThreads; i++) { diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index c4c90c99a..896c66f3e 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -40,8 +40,7 @@ #include <vector> #include "cpu/o3/free_list.hh" -//For RegIndex -#include "arch/isa_traits.hh" +#include "arch/types.hh" class SimpleRenameMap { diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 1b9f666b8..fab114a74 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -32,11 +32,11 @@ #include "config/full_system.hh" #include "cpu/o3/rob.hh" -using namespace std; +#include <list> template <class Impl> ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, - string _smtROBPolicy, unsigned _smtROBThreshold, + std::string _smtROBPolicy, unsigned _smtROBThreshold, unsigned _numThreads) : numEntries(_numEntries), squashWidth(_squashWidth), @@ -49,7 +49,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, threadEntries[tid] = 0; } - string policy = _smtROBPolicy; + std::string policy = _smtROBPolicy; //Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -118,7 +118,7 @@ ROB<Impl>::setCPU(O3CPU *cpu_ptr) template <class Impl> void -ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr) +ROB<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(ROB, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -157,8 +157,8 @@ ROB<Impl>::resetEntries() if (robPolicy != Dynamic || numThreads > 1) { int active_threads = (*activeThreads).size(); - list<unsigned>::iterator threads = (*activeThreads).begin(); - list<unsigned>::iterator list_end = (*activeThreads).end(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator list_end = (*activeThreads).end(); while (threads != list_end) { if (robPolicy == Partitioned) { @@ -318,7 +318,7 @@ bool ROB<Impl>::canCommit() { //@todo: set ActiveThreads through ROB or CPU - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -432,7 +432,7 @@ ROB<Impl>::updateHead() bool first_valid = true; // @todo: set ActiveThreads through ROB or CPU - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned thread_num = *threads++; @@ -472,7 +472,7 @@ ROB<Impl>::updateTail() tail = instList[0].end(); bool first_valid = true; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh index f8e4df3b7..eefff1d8b 100644 --- a/src/cpu/o3/scoreboard.hh +++ b/src/cpu/o3/scoreboard.hh @@ -35,7 +35,6 @@ #include <iostream> #include <utility> #include <vector> -#include "arch/alpha/isa_traits.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh index f5a44a1ac..f9f7637d0 100644 --- a/src/cpu/o3/store_set.hh +++ b/src/cpu/o3/store_set.hh @@ -36,8 +36,8 @@ #include <utility> #include <vector> -#include "arch/isa_traits.hh" #include "cpu/inst_seq.hh" +#include "sim/host.hh" struct ltseqnum { bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index df8d1a6d8..9ca02b9f3 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -26,12 +26,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_THREAD_CONTEXT_HH__ #define __CPU_O3_THREAD_CONTEXT_HH__ +#include "cpu/thread_context.hh" #include "cpu/o3/isa_specific.hh" class EndQuiesceEvent; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index bf8cbf850..a4546e669 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -32,8 +32,6 @@ #include "cpu/o3/thread_context.hh" #include "cpu/quiesce_event.hh" -using namespace TheISA; - #if FULL_SYSTEM template <class Impl> VirtualPort * @@ -285,7 +283,7 @@ O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc) } // Copy the misc regs. - copyMiscRegs(tc, this); + TheISA::copyMiscRegs(tc, this); // Then finally set the PC and the next PC. cpu->setPC(tc->readPC(), tid); @@ -308,7 +306,7 @@ O3ThreadContext<Impl>::readIntReg(int reg_idx) } template <class Impl> -FloatReg +TheISA::FloatReg O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width) { switch(width) { @@ -323,14 +321,14 @@ O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width) } template <class Impl> -FloatReg +TheISA::FloatReg O3ThreadContext<Impl>::readFloatReg(int reg_idx) { return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); } template <class Impl> -FloatRegBits +TheISA::FloatRegBits O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width) { DPRINTF(Fault, "Reading floatint register through the TC!\n"); @@ -338,7 +336,7 @@ O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width) } template <class Impl> -FloatRegBits +TheISA::FloatRegBits O3ThreadContext<Impl>::readFloatRegBits(int reg_idx) { return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index 1c8105204..b6f2e14c0 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -31,8 +31,6 @@ #ifndef __CPU_O3_THREAD_STATE_HH__ #define __CPU_O3_THREAD_STATE_HH__ -#include "arch/faults.hh" -#include "arch/isa_traits.hh" #include "cpu/thread_context.hh" #include "cpu/thread_state.hh" diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh index 92402adc6..66b4aaae2 100644 --- a/src/cpu/o3/tournament_pred.hh +++ b/src/cpu/o3/tournament_pred.hh @@ -31,9 +31,8 @@ #ifndef __CPU_O3_TOURNAMENT_PRED_HH__ #define __CPU_O3_TOURNAMENT_PRED_HH__ -// For Addr type. -#include "arch/isa_traits.hh" #include "cpu/o3/sat_counter.hh" +#include "sim/host.hh" #include <vector> /** diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index f58b81990..80f18434c 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -47,6 +47,7 @@ #include "arch/faults.hh" #include "arch/alpha/osfpal.hh" #include "arch/alpha/tlb.hh" +#include "arch/alpha/types.hh" #include "arch/vtophys.hh" #include "base/callback.hh" //#include "base/remote_gdb.hh" diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh index 67691d416..75ac464ec 100644 --- a/src/cpu/ozone/dyn_inst.hh +++ b/src/cpu/ozone/dyn_inst.hh @@ -32,6 +32,7 @@ #define __CPU_OZONE_DYN_INST_HH__ #include "arch/isa_traits.hh" +#include "arch/types.hh" #include "config/full_system.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh index bad902c2a..ba0d70417 100644 --- a/src/cpu/ozone/dyn_inst_impl.hh +++ b/src/cpu/ozone/dyn_inst_impl.hh @@ -29,13 +29,10 @@ */ #include "arch/faults.hh" -#include "arch/isa_traits.hh" #include "config/full_system.hh" #include "cpu/ozone/dyn_inst.hh" #include "kern/kernel_stats.hh" -using namespace TheISA; - template <class Impl> OzoneDynInst<Impl>::OzoneDynInst(OzoneCPU *cpu) : BaseDynInst<Impl>(0, 0, 0, 0, cpu) diff --git a/src/cpu/ozone/ea_list.hh b/src/cpu/ozone/ea_list.hh index 64882632c..d9e9d701f 100644 --- a/src/cpu/ozone/ea_list.hh +++ b/src/cpu/ozone/ea_list.hh @@ -35,8 +35,8 @@ #include <list> #include <utility> -#include "arch/isa_traits.hh" #include "cpu/inst_seq.hh" +#include "sim/host.hh" /** * Simple class to hold onto a list of pairs, each pair having a memory diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 9da937320..c9c5a869b 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -32,6 +32,7 @@ #include "arch/faults.hh" #include "arch/isa_traits.hh" +#include "arch/utility.hh" #include "base/statistics.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh index cbb73364e..701fc0ee9 100644 --- a/src/cpu/ozone/inorder_back_end_impl.hh +++ b/src/cpu/ozone/inorder_back_end_impl.hh @@ -29,12 +29,10 @@ */ #include "arch/faults.hh" -#include "arch/isa_traits.hh" +#include "arch/types.hh" #include "cpu/ozone/inorder_back_end.hh" #include "cpu/ozone/thread_state.hh" -using namespace TheISA; - template <class Impl> InorderBackEnd<Impl>::InorderBackEnd(Params *params) : squashPending(false), diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh index 1b5340e55..38c1c09a2 100644 --- a/src/cpu/ozone/lsq_unit.hh +++ b/src/cpu/ozone/lsq_unit.hh @@ -36,7 +36,7 @@ #include <algorithm> #include "arch/faults.hh" -#include "arch/isa_traits.hh" +#include "arch/types.hh" #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh index f8cb18634..ee0804036 100644 --- a/src/cpu/ozone/lsq_unit_impl.hh +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -28,7 +28,7 @@ * Authors: Kevin Lim */ -#include "arch/isa_traits.hh" +#include "arch/faults.hh" #include "base/str.hh" #include "cpu/ozone/lsq_unit.hh" diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 2eb09d01a..9a21a9d01 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -37,7 +37,7 @@ #include <algorithm> #include "arch/faults.hh" -#include "arch/isa_traits.hh" +#include "arch/types.hh" #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 88e9c218f..7eef4b11f 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -30,7 +30,7 @@ #include "config/use_checker.hh" -#include "arch/isa_traits.hh" +#include "arch/faults.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" #include "cpu/checker/cpu.hh" diff --git a/src/cpu/ozone/null_predictor.hh b/src/cpu/ozone/null_predictor.hh index a98c89d69..0751338b7 100644 --- a/src/cpu/ozone/null_predictor.hh +++ b/src/cpu/ozone/null_predictor.hh @@ -31,8 +31,8 @@ #ifndef __CPU_OZONE_NULL_PREDICTOR_HH__ #define __CPU_OZONE_NULL_PREDICTOR_HH__ -#include "arch/isa_traits.hh" #include "cpu/inst_seq.hh" +#include "sim/host.hh" template <class Impl> class NullPredictor diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh index 503675738..2271cd68a 100644 --- a/src/cpu/ozone/ozone_impl.hh +++ b/src/cpu/ozone/ozone_impl.hh @@ -31,7 +31,6 @@ #ifndef __CPU_OZONE_OZONE_IMPL_HH__ #define __CPU_OZONE_OZONE_IMPL_HH__ -#include "arch/alpha/isa_traits.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/front_end.hh" #include "cpu/ozone/inst_queue.hh" diff --git a/src/cpu/ozone/simple_impl.hh b/src/cpu/ozone/simple_impl.hh index 3199d8d8a..42002180b 100644 --- a/src/cpu/ozone/simple_impl.hh +++ b/src/cpu/ozone/simple_impl.hh @@ -31,7 +31,6 @@ #ifndef __CPU_OZONE_SIMPLE_IMPL_HH__ #define __CPU_OZONE_SIMPLE_IMPL_HH__ -#include "arch/isa_traits.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/front_end.hh" diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh index ef4b1429d..8234cf938 100644 --- a/src/cpu/ozone/thread_state.hh +++ b/src/cpu/ozone/thread_state.hh @@ -32,7 +32,8 @@ #define __CPU_OZONE_THREAD_STATE_HH__ #include "arch/faults.hh" -#include "arch/isa_traits.hh" +#include "arch/types.hh" +#include "arch/regfile.hh" #include "cpu/thread_context.hh" #include "cpu/thread_state.hh" #include "sim/process.hh" diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 12bfdeb9b..c396f5033 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -33,6 +33,7 @@ #include "cpu/simple/atomic.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -158,18 +159,31 @@ AtomicSimpleCPU::~AtomicSimpleCPU() void AtomicSimpleCPU::serialize(ostream &os) { - SERIALIZE_ENUM(_status); - BaseSimpleCPU::serialize(os); + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); + BaseSimpleCPU::serialize(os); } void AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - UNSERIALIZE_ENUM(_status); - BaseSimpleCPU::unserialize(cp, section); + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + BaseSimpleCPU::unserialize(cp, section); +} + +void +AtomicSimpleCPU::resume() +{ + assert(system->getMemoryMode() == System::Atomic); + changeState(SimObject::Running); + if (thread->status() == ThreadContext::Active) { + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + } } void @@ -451,11 +465,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -483,11 +497,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -520,11 +534,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->width = width; params->simulate_stalls = simulate_stalls; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 179b4a721..b602af558 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -126,6 +126,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index a50541189..801c96c88 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,10 +26,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt - * Korey Sewell */ #include "arch/utility.hh" +#include "arch/faults.hh" #include "base/cprintf.hh" #include "base/inifile.hh" #include "base/loader/symtab.hh" @@ -55,10 +55,10 @@ #include "sim/sim_events.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "sim/system.hh" #if FULL_SYSTEM #include "base/remote_gdb.hh" -#include "sim/system.hh" #include "arch/tlb.hh" #include "arch/stacktrace.hh" #include "arch/vtophys.hh" @@ -178,8 +178,8 @@ void BaseSimpleCPU::serialize(ostream &os) { BaseCPU::serialize(os); - SERIALIZE_SCALAR(inst); - nameOut(os, csprintf("%s.xc", name())); +// SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc.0", name())); thread->serialize(os); } @@ -187,8 +187,8 @@ void BaseSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { BaseCPU::unserialize(cp, section); - UNSERIALIZE_SCALAR(inst); - thread->unserialize(cp, csprintf("%s.xc", section)); +// UNSERIALIZE_SCALAR(inst); + thread->unserialize(cp, csprintf("%s.xc.0", section)); } void @@ -455,6 +455,7 @@ BaseSimpleCPU::advancePC(Fault fault) #else thread->setNextPC(thread->readNextNPC()); thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + assert(thread->readNextPC() != thread->readNextNPC()); #endif } diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index e55301c6b..5c1654f7e 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -33,6 +33,7 @@ #include "cpu/simple/timing.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -84,14 +85,22 @@ TimingSimpleCPU::CpuPort::recvStatusChange(Status status) panic("TimingSimpleCPU doesn't expect recvStatusChange callback!"); } + +void +TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t) +{ + pkt = _pkt; + Event::schedule(t); +} + TimingSimpleCPU::TimingSimpleCPU(Params *p) - : BaseSimpleCPU(p), icachePort(this), dcachePort(this) + : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; - state = SimObject::Timing; + changeState(SimObject::Running); } @@ -102,29 +111,31 @@ TimingSimpleCPU::~TimingSimpleCPU() void TimingSimpleCPU::serialize(ostream &os) { - SERIALIZE_ENUM(_status); + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); BaseSimpleCPU::serialize(os); } void TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - UNSERIALIZE_ENUM(_status); + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); BaseSimpleCPU::unserialize(cp, section); } -bool +unsigned int TimingSimpleCPU::drain(Event *drain_event) { // TimingSimpleCPU is ready to drain if it's not waiting for // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { - changeState(SimObject::DrainedTiming); - return true; + changeState(SimObject::Drained); + return 0; } else { changeState(SimObject::Draining); drainEvent = drain_event; - return false; + return 1; } } @@ -134,7 +145,9 @@ TimingSimpleCPU::resume() if (_status != SwitchedOut && _status != Idle) { // Delete the old event if it existed. if (fetchEvent) { - assert(!fetchEvent->scheduled()); + if (fetchEvent->scheduled()) + fetchEvent->deschedule(); + delete fetchEvent; } @@ -142,12 +155,9 @@ TimingSimpleCPU::resume() new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); fetchEvent->schedule(curTick); } -} -void -TimingSimpleCPU::setMemoryMode(State new_mode) -{ - assert(new_mode == SimObject::Timing); + assert(system->getMemoryMode() == System::Timing); + changeState(SimObject::Running); } void @@ -460,11 +470,26 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) } } +void +TimingSimpleCPU::IcachePort::ITickEvent::process() +{ + cpu->completeIfetch(pkt); +} bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - cpu->completeIfetch(pkt); + // These next few lines could be replaced with something faster + // who knows what though + Tick time = pkt->req->getTime(); + while (time < curTick) + time += lat; + + if (time == curTick) + cpu->completeIfetch(pkt); + else + tickEvent.schedule(pkt, time); + return true; } @@ -514,18 +539,32 @@ void TimingSimpleCPU::completeDrain() { DPRINTF(Config, "Done draining\n"); - changeState(SimObject::DrainedTiming); + changeState(SimObject::Drained); drainEvent->process(); } bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) { - cpu->completeDataAccess(pkt); + Tick time = pkt->req->getTime(); + while (time < curTick) + time += lat; + + if (time == curTick) + cpu->completeDataAccess(pkt); + else + tickEvent.schedule(pkt, time); + return true; } void +TimingSimpleCPU::DcachePort::DTickEvent::process() +{ + cpu->completeDataAccess(pkt); +} + +void TimingSimpleCPU::DcachePort::recvRetry() { // we shouldn't get a retry unless we have a packet that we're @@ -551,11 +590,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -583,11 +622,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -618,11 +657,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 0a3f91e6c..d03fa4bc0 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -74,11 +74,12 @@ class TimingSimpleCPU : public BaseSimpleCPU { protected: TimingSimpleCPU *cpu; + Tick lat; public: - CpuPort(const std::string &_name, TimingSimpleCPU *_cpu) - : Port(_name), cpu(_cpu) + CpuPort(const std::string &_name, TimingSimpleCPU *_cpu, Tick _lat) + : Port(_name), cpu(_cpu), lat(_lat) { } protected: @@ -92,14 +93,26 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { resp.clear(); snoop.clear(); } + + struct TickEvent : public Event + { + Packet *pkt; + TimingSimpleCPU *cpu; + + TickEvent(TimingSimpleCPU *_cpu) + :Event(&mainEventQueue), cpu(_cpu) {} + const char *description() { return "Timing CPU clock event"; } + void schedule(Packet *_pkt, Tick t); + }; + }; class IcachePort : public CpuPort { public: - IcachePort(TimingSimpleCPU *_cpu) - : CpuPort(_cpu->name() + "-iport", _cpu) + IcachePort(TimingSimpleCPU *_cpu, Tick _lat) + : CpuPort(_cpu->name() + "-iport", _cpu, _lat), tickEvent(_cpu) { } protected: @@ -107,14 +120,26 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual bool recvTiming(Packet *pkt); virtual void recvRetry(); + + struct ITickEvent : public TickEvent + { + + ITickEvent(TimingSimpleCPU *_cpu) + : TickEvent(_cpu) {} + void process(); + const char *description() { return "Timing CPU clock event"; } + }; + + ITickEvent tickEvent; + }; class DcachePort : public CpuPort { public: - DcachePort(TimingSimpleCPU *_cpu) - : CpuPort(_cpu->name() + "-dport", _cpu) + DcachePort(TimingSimpleCPU *_cpu, Tick _lat) + : CpuPort(_cpu->name() + "-dport", _cpu, _lat), tickEvent(_cpu) { } protected: @@ -122,6 +147,17 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual bool recvTiming(Packet *pkt); virtual void recvRetry(); + + struct DTickEvent : public TickEvent + { + DTickEvent(TimingSimpleCPU *_cpu) + : TickEvent(_cpu) {} + void process(); + const char *description() { return "Timing CPU clock event"; } + }; + + DTickEvent tickEvent; + }; IcachePort icachePort; @@ -137,9 +173,8 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - virtual bool drain(Event *drain_event); + virtual unsigned int drain(Event *drain_event); virtual void resume(); - virtual void setMemoryMode(State new_mode); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index af1db2ff2..5f86cf2b7 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -196,6 +196,7 @@ SimpleThread::copyState(ThreadContext *oldContext) #if !FULL_SYSTEM funcExeInst = oldContext->readFuncExeInst(); #endif + inst = oldContext->getInst(); } void diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index d36853db4..242cfd0e1 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -449,8 +449,8 @@ class SimpleThread : public ThreadState } #endif - void changeRegFileContext(RegFile::ContextParam param, - RegFile::ContextVal val) + void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) { regs.changeContext(param, val); } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index ea1a65148..578d14191 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -34,14 +34,16 @@ #include <bitset> #include <string> +#include "arch/isa_traits.hh" +#include "sim/faults.hh" #include "base/bitfield.hh" #include "base/hashmap.hh" #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" #include "cpu/o3/dyn_inst.hh" +#include "sim/faults.hh" #include "sim/host.hh" -#include "arch/isa_traits.hh" // forward declarations struct AlphaSimpleImpl; @@ -214,6 +216,7 @@ class StaticInstBase : public RefCounted bool isIndirectCtrl() const { return flags[IsIndirectControl]; } bool isCondCtrl() const { return flags[IsCondControl]; } bool isUncondCtrl() const { return flags[IsUncondControl]; } + bool isCondDelaySlot() const { return flags[IsCondDelaySlot]; } bool isThreadSync() const { return flags[IsThreadSync]; } bool isSerializing() const { return flags[IsSerializing] || diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index e019e22bc..73046097d 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -31,6 +31,9 @@ #ifndef __CPU_THREAD_CONTEXT_HH__ #define __CPU_THREAD_CONTEXT_HH__ +#include "arch/types.hh" +#include "arch/regfile.hh" +#include "arch/syscallreturn.hh" #include "config/full_system.hh" #include "mem/request.hh" #include "sim/faults.hh" @@ -254,8 +257,8 @@ class ThreadContext virtual int exit() { return 1; }; #endif - virtual void changeRegFileContext(RegFile::ContextParam param, - RegFile::ContextVal val) = 0; + virtual void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) = 0; }; /** @@ -438,8 +441,8 @@ class ProxyThreadContext : public ThreadContext Counter readFuncExeInst() { return actualTC->readFuncExeInst(); } #endif - void changeRegFileContext(RegFile::ContextParam param, - RegFile::ContextVal val) + void changeRegFileContext(TheISA::RegContextParam param, + TheISA::RegContextVal val) { actualTC->changeRegFileContext(param, val); } diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index b03a2e2bb..6e985054f 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -31,7 +31,7 @@ #ifndef __CPU_THREAD_STATE_HH__ #define __CPU_THREAD_STATE_HH__ -#include "arch/isa_traits.hh" +#include "arch/types.hh" #include "cpu/thread_context.hh" #if !FULL_SYSTEM |