diff options
Diffstat (limited to 'src/cpu')
37 files changed, 2058 insertions, 94 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9cc61f74c..40611abe6 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -215,6 +215,9 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr nextPC; + /** Next non-speculative NPC. Target PC for Mips or Sparc. */ + Addr nextNPC; + /** Predicted next PC. */ Addr predPC; @@ -275,6 +278,11 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr readNextPC() { return nextPC; } + /** Returns the next NPC. This could be the speculative next NPC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextNPC() { return nextNPC; } + /** Set the predicted target of this current instruction. */ void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } @@ -282,11 +290,20 @@ class BaseDynInst : public FastAlloc, public RefCounted Addr readPredTarg() { return predPC; } /** Returns whether the instruction was predicted taken or not. */ - bool predTaken() { return predPC != (PC + sizeof(MachInst)); } + bool predTaken() +#if THE_ISA == ALPHA_ISA + { return predPC != (PC + sizeof(MachInst)); } +#else + { return predPC != (nextPC + sizeof(MachInst)); } +#endif /** Returns whether the instruction mispredicted. */ - bool mispredicted() { return predPC != nextPC; } - + bool mispredicted() +#if THE_ISA == ALPHA_ISA + { return predPC != nextPC; } +#else + { return predPC != nextNPC; } +#endif // // Instruction types. Forward checks to StaticInst object. // @@ -308,6 +325,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } bool isCondCtrl() const { return staticInst->isCondCtrl(); } bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const @@ -545,6 +563,12 @@ class BaseDynInst : public FastAlloc, public RefCounted nextPC = val; } + /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ + void setNextNPC(uint64_t val) + { + nextNPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 91424faad..a344990b4 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -74,6 +74,7 @@ BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC, PC = inst_PC; nextPC = PC + sizeof(MachInst); + nextNPC = nextPC + sizeof(MachInst); predPC = pred_PC; initVars(); diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript index e65d41411..44882e5ec 100755 --- a/src/cpu/o3/SConscript +++ b/src/cpu/o3/SConscript @@ -52,15 +52,14 @@ if env['TARGET_ISA'] == 'alpha': alpha/cpu_builder.cc ''') elif env['TARGET_ISA'] == 'mips': - sys.exit('O3 CPU does not support MIPS') - #sources += Split(''' - # mips/dyn_inst.cc - # mips/cpu.cc - # mips/thread_context.cc - # mips/cpu_builder.cc - # ''') + sources += Split(''' + mips/dyn_inst.cc + mips/cpu.cc + mips/thread_context.cc + mips/cpu_builder.cc + ''') elif env['TARGET_ISA'] == 'sparc': - sys.exit('O3 CPU does not support MIPS') + sys.exit('O3 CPU does not support Sparc') #sources += Split(''' # sparc/dyn_inst.cc # sparc/cpu.cc diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index ad52b0d2e..773f08ecc 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #include "cpu/o3/thread_context.hh" diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 0da02145b..8dad42011 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -159,7 +159,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) void *bp_history = NULL; if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. BPUncond(bp_history); @@ -201,15 +201,20 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++BTBLookups; if (inst->isCall()) { - RAS[tid].push(PC + sizeof(MachInst)); +#if THE_ISA == ALPHA_ISA + Addr ras_pc = PC + sizeof(MachInst); // Next PC +#else + Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC +#endif + RAS[tid].push(ras_pc); // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. predict_record.wasCall = true; - DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call" + DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x was a call" ", adding %#x to the RAS.\n", - tid, inst->readPC(), PC + sizeof(MachInst)); + tid, inst->readPC(), ras_pc); } if (BTB.valid(PC, tid)) { @@ -242,7 +247,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size()); + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); return pred_taken; } @@ -251,8 +256,8 @@ template <class Impl> void BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" - "number %lli.\n", tid, done_sn); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until " + "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index bf1bd08e8..e311910cf 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -88,6 +88,7 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; + bool condDelaySlotBranch[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; @@ -113,6 +114,7 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -165,6 +167,9 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; + bool squashDelaySlot; + //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 956b6ec3e..cb83012f7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -286,13 +286,11 @@ class DefaultCommit /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } -#if THE_ISA != ALPHA_ISA /** Reads the next NPC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } -#endif + void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } private: /** Time buffer interface. */ @@ -397,10 +395,8 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; -#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c667d633a..8a8035e73 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -120,7 +120,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = 0; + PC[i] = nextPC[i] = nextNPC[i] = 0; } } @@ -723,14 +723,48 @@ DefaultCommit<Impl>::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; - if (fromIEW->includeSquashInst[tid] == true) - squashed_inst--; +#if THE_ISA != ALPHA_ISA + InstSeqNum bdelay_done_seq_num; + bool squash_bdelay_slot; + + if (fromIEW->branchMispredict[tid]) { + if (fromIEW->branchTaken[tid] && + fromIEW->condDelaySlotBranch[tid]) { + DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch" + "mispredicted as taken. Squashing after previous " + "inst, [sn:%i]\n", + tid, squashed_inst); + bdelay_done_seq_num = squashed_inst; + squash_bdelay_slot = true; + } else { + DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing " + "after delay slot [sn:%i]\n", tid, squashed_inst+1); + bdelay_done_seq_num = squashed_inst + 1; + squash_bdelay_slot = false; + } + } else { + bdelay_done_seq_num = squashed_inst; + } +#endif + if (fromIEW->includeSquashInst[tid] == true) { + squashed_inst--; +#if THE_ISA != ALPHA_ISA + bdelay_done_seq_num--; +#endif + } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; +#if THE_ISA == ALPHA_ISA rob->squash(squashed_inst, tid); + toIEW->commitInfo[tid].squashDelaySlot = true; +#else + rob->squash(bdelay_done_seq_num, tid); + toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; + toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; +#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -840,6 +874,7 @@ DefaultCommit<Impl>::commitInsts() } else { PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); + nextNPC[tid] = head_inst->readNextNPC(); // Increment the total number of non-speculative instructions // executed. @@ -868,7 +903,13 @@ DefaultCommit<Impl>::commitInsts() } PC[tid] = nextPC[tid]; +#if THE_ISA == ALPHA_ISA nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); +#else + nextPC[tid] = nextNPC[tid]; + nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); +#endif + #if FULL_SYSTEM int count = 0; Addr oldpc; @@ -1069,6 +1110,8 @@ template <class Impl> void DefaultCommit<Impl>::getInsts() { + DPRINTF(Commit, "Getting instructions from Rename stage.\n"); + // Read any renamed instructions and place them into the ROB. int insts_to_process = min((int)renameWidth, fromRename->size); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c43cc2cf8..af032132e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -667,11 +667,12 @@ FullO3CPU<Impl>::removeThread(unsigned tid) } // Squash Throughout Pipeline - fetch.squash(0,tid); + InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; + fetch.squash(0, squash_seq_num, true, tid); decode.squash(tid); - rename.squash(tid); + rename.squash(squash_seq_num, tid); iew.squash(tid); - commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); + commit.rob->squash(squash_seq_num, tid); assert(iew.ldstQueue.getCount(tid) == 0); @@ -1101,7 +1102,6 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } -#if THE_ISA != ALPHA_ISA template <class Impl> uint64_t FullO3CPU<Impl>::readNextNPC(unsigned tid) @@ -1111,11 +1111,10 @@ FullO3CPU<Impl>::readNextNPC(unsigned tid) template <class Impl> void -FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid) +FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid) { commit.setNextNPC(val, tid); } -#endif template <class Impl> typename FullO3CPU<Impl>::ListIt @@ -1165,7 +1164,9 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) template <class Impl> void -FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) +FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid, + bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1196,6 +1197,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) while (inst_it != end_it) { assert(!instList.empty()); +#if THE_ISA != ALPHA_ISA + if(!squash_delay_slot && + delay_slot_seq_num >= (*inst_it)->seqNum) { + break; + } +#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 83cb966e3..8c4f663ef 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -449,8 +449,10 @@ class FullO3CPU : public BaseO3CPU */ void removeFrontInst(DynInstPtr &inst); - /** Remove all instructions that are not currently in the ROB. */ - void removeInstsNotInROB(unsigned tid); + /** Remove all instructions that are not currently in the ROB. + * There's also an option to not squash delay slot instructions.*/ + void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 7f5ecbc26..4a845e670 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -276,6 +276,19 @@ class DefaultDecode /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/ + Addr bdelayDoneSeqNum[Impl::MaxThreads]; + + /** Instruction used for squashing branch (used for MIPS)*/ + DynInstPtr squashInst[Impl::MaxThreads]; + + /** Tells when their is a pending delay slot inst. to send + * to rename. If there is, then wait squash after the next + * instruction (used for MIPS). + */ + bool squashAfterDelaySlot[Impl::MaxThreads]; + + /** Stat for total number of idle cycles. */ Stats::Scalar<> decodeIdleCycles; /** Stat for total number of blocked cycles. */ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8b851c032..0bc6a109f 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -50,6 +50,8 @@ DefaultDecode<Impl>::DefaultDecode(Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; + + squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -278,13 +280,25 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); +#if THE_ISA == ALPHA_ISA toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + InstSeqNum squash_seq_num = inst->seqNum; +#else + toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; + squashAfterDelaySlot[tid] = false; + + InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; +#endif + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { @@ -296,7 +310,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && - fromFetch->insts[i]->seqNum > inst->seqNum) { + fromFetch->insts[i]->seqNum > squash_seq_num) { fromFetch->insts[i]->setSquashed(); } } @@ -304,15 +318,35 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif skidBuffer[tid].pop(); } // Squash instructions up until this one - cpu->removeInstsUntil(inst->seqNum, tid); + cpu->removeInstsUntil(squash_seq_num, tid); } template<class Impl> @@ -611,7 +645,7 @@ DefaultDecode<Impl>::decode(bool &status_change, unsigned tid) // will allow, as long as it is not currently blocked. if (decodeStatus[tid] == Running || decodeStatus[tid] == Idle) { - DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run " + DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run " "stage.\n",tid); decodeInsts(tid); @@ -710,6 +744,9 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid) // Ensure that if it was predicted as a branch, it really is a // branch. if (inst->predTaken() && !inst->isControl()) { + DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC, + inst->nextPC + 4); + panic("Instruction predicted as a branch!"); ++decodeControlMispred; @@ -730,12 +767,43 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end +#if THE_ISA == ALPHA_ISA squash(inst, inst->threadNumber); inst->setPredTarg(inst->branchTarget()); - break; +#else + // If mispredicted as taken, then ignore delay slot + // instruction... else keep delay slot and squash + // after it is sent to rename + if (inst->predTaken() && inst->isCondDelaySlot()) { + DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." + "[sn:%i] PC %#x mispredicted as taken.\n", tid, + inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum; + squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); + break; + } else { + DPRINTF(Decode, "[tid:%i]: Misprediction detected at " + "[sn:%i] PC %#x, will squash after delay slot " + "inst. is sent to Rename\n", + tid, inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum + 1; + squashAfterDelaySlot[tid] = true; + squashInst[tid] = inst; + continue; + } +#endif } } + + if (squashAfterDelaySlot[tid]) { + assert(!inst->isSquashed()); + squash(squashInst[tid], squashInst[tid]->threadNumber); + squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget()); + assert(!inst->isSquashed()); + break; + } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index a2cdf2dba..5f7caf79f 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -34,12 +34,15 @@ #include "arch/isa_specific.hh" #if THE_ISA == ALPHA_ISA -template <class Impl> -class AlphaDynInst; - -struct AlphaSimpleImpl; - -typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; + template <class Impl> class AlphaDynInst; + struct AlphaSimpleImpl; + typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#elif THE_ISA == MIPS_ISA + template <class Impl> class MipsDynInst; + struct MipsSimpleImpl; + typedef MipsDynInst<MipsSimpleImpl> O3DynInst; +#else + #error "O3DynInst not defined for this ISA" #endif #endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 931919af8..1a2ca32a4 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -106,6 +106,7 @@ class DefaultFetch virtual void recvRetry(); }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -218,9 +219,10 @@ class DefaultFetch * @param next_PC Next PC variable passed in by reference. It is * expected to be set to the current PC; it will be updated with what * the next PC will be. + * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -255,7 +257,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const Addr &new_PC, unsigned tid); + void squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. @@ -340,14 +343,12 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** Per-thread next Next PC. * This is not a real register but is used for * architectures that use a branch-delay slot. * (such as MIPS or Sparc) */ Addr nextNPC[Impl::MaxThreads]; -#endif /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -360,6 +361,19 @@ class DefaultFetch /** Tracks how many instructions has been fetched this cycle. */ int numInst; + /** Tracks delay slot information for threads in ISAs which use + * delay slots; + */ + struct DelaySlotInfo { + InstSeqNum delaySlotSeqNum; + InstSeqNum branchSeqNum; + int numInsts; + Addr targetAddr; + bool targetReady; + }; + + DelaySlotInfo delaySlotInfo[Impl::MaxThreads]; + /** Source of possible stalls. */ struct Stalls { bool decode; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 4184e1867..54e35433b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -165,10 +165,15 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) cacheDataPC[tid] = 0; cacheDataValid[tid] = false; - stalls[tid].decode = 0; - stalls[tid].rename = 0; - stalls[tid].iew = 0; - stalls[tid].commit = 0; + delaySlotInfo[tid].branchSeqNum = -1; + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + + stalls[tid].decode = false; + stalls[tid].rename = false; + stalls[tid].iew = false; + stalls[tid].commit = false; } // Get the size of an instruction. @@ -288,6 +293,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) } #endif + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -426,6 +434,10 @@ DefaultFetch<Impl>::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if THE_ISA != ALPHA_ISA nextNPC[i] = cpu->readNextNPC(i); + delaySlotInfo[i].branchSeqNum = -1; + delaySlotInfo[i].numInsts = 0; + delaySlotInfo[i].targetAddr = 0; + delaySlotInfo[i].targetReady = false; #endif fetchStatus[i] = Running; } @@ -474,7 +486,8 @@ DefaultFetch<Impl>::switchToInactive() template <class Impl> bool -DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -482,12 +495,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) bool predict_taken; if (!inst->isControl()) { +#if THE_ISA == ALPHA_ISA next_PC = next_PC + instSize; inst->setPredTarg(next_PC); +#else + Addr cur_PC = next_PC; + next_PC = cur_PC + instSize; //next_NPC; + next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; + inst->setPredTarg(next_NPC); +#endif return false; } - predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + int tid = inst->threadNumber; +#if THE_ISA == ALPHA_ISA + predict_taken = branchPred.predict(inst, next_PC, tid); +#else + Addr pred_PC = next_PC; + predict_taken = branchPred.predict(inst, pred_PC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + } + + if (predict_taken) { + next_PC = next_NPC; + next_NPC = pred_PC; + + // Update delay slot info + ++delaySlotInfo[tid].numInsts; + delaySlotInfo[tid].targetAddr = pred_PC; + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, + delaySlotInfo[tid].numInsts); + } else { // !predict_taken + if (inst->isCondDelaySlot()) { + next_PC = pred_PC; + // The delay slot is skipped here if there is on + // prediction + } else { + next_PC = next_NPC; + // No need to declare a delay slot here since + // there is no for the pred. target to jump + } + + next_NPC = next_NPC + instSize; + } +#endif ++fetchedBranches; @@ -606,6 +661,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) PC[tid] = new_PC; nextPC[tid] = new_PC + instSize; + nextNPC[tid] = new_PC + (2 * instSize); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -639,6 +695,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, doSquash(new_PC, tid); +#if THE_ISA != ALPHA_ISA + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } +#endif + // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); @@ -712,14 +776,26 @@ DefaultFetch<Impl>::updateFetchStatus() template <class Impl> void -DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid) +DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, tid); +#if THE_ISA == ALPHA_ISA // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); + cpu->removeInstsNotInROB(tid, true, 0); +#else + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); +#endif } template <class Impl> @@ -828,8 +904,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // In any case, squash. - squash(fromCommit->commitInfo[tid].nextPC,tid); + squash(fromCommit->commitInfo[tid].nextPC, + doneSeqNum, + fromCommit->commitInfo[tid].squashDelaySlot, + tid); // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -876,9 +960,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fetchStatus[tid] != Squashing) { + +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; +#endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].doneSeqNum, + doneSeqNum, tid); return true; @@ -984,6 +1074,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) } Addr next_PC = fetch_PC; + Addr next_NPC = next_PC + instSize; InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -1002,10 +1093,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; + // Need to keep track of whether or not a delay slot + // instruction has been fetched + for (; offset < cacheBlkSize && numInst < fetchWidth && - !predicted_branch; + (!predicted_branch || delaySlotInfo[tid].numInsts > 0); ++numInst) { // Get a sequence number. @@ -1042,7 +1136,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) instruction->staticInst, instruction->readPC(),tid); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, + next_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1068,7 +1163,41 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } - offset+= instSize; + offset += instSize; + +#if THE_ISA != ALPHA_ISA + if (predicted_branch) { + delaySlotInfo[tid].branchSeqNum = inst_seq; + + DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", + tid, inst_seq); + continue; + } else if (delaySlotInfo[tid].numInsts > 0) { + --delaySlotInfo[tid].numInsts; + + // It's OK to set PC to target of branch + if (delaySlotInfo[tid].numInsts == 0) { + delaySlotInfo[tid].targetReady = true; + + // Break the looping condition + predicted_branch = true; + } + + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" + " process.\n", tid, delaySlotInfo[tid].numInsts); + } +#endif + } + + if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } } @@ -1085,12 +1214,19 @@ DefaultFetch<Impl>::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; #else - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; - nextPC[tid] = next_PC + instSize; - - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + if (delaySlotInfo[tid].targetReady && + delaySlotInfo[tid].numInsts == 0) { + // Set PC to target + PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC + nextPC[tid] = next_PC + instSize; //next_NPC + nextNPC[tid] = next_PC + (2 * instSize); + + delaySlotInfo[tid].targetReady = false; + } else { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; + } #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 6c6be4787..76fa008ee 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -405,6 +405,9 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; + /** Keeps track of the last valid branch delay slot instss for threads */ + InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; + /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 684ae2295..c4ec93b3e 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -73,6 +73,7 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; + bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -428,13 +429,31 @@ DefaultIEW<Impl>::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. +#if THE_ISA == ALPHA_ISA ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); - +#else + ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); +#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. - while (!skidBuffer[tid].empty()) { + DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", + tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + while (!skidBuffer[tid].empty()) { +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= + fromCommit->commitInfo[tid].bdelayDoneSeqNum) { + DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " + "that occur before delay slot [sn:%i].\n", + fromCommit->commitInfo[tid].bdelayDoneSeqNum, + tid); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " + "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); + } +#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,6 +464,8 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } + bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; + emptyRenameInsts(tid); } @@ -458,10 +479,26 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); - toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; + +#if THE_ISA == ALPHA_ISA toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toCommit->nextPC[tid] = inst->readNextPC(); +#else + bool branch_taken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toCommit->branchTaken[tid] = branch_taken; + + toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot(); + + if (inst->isCondDelaySlot() && branch_taken) { + toCommit->nextPC[tid] = inst->readNextPC(); + } else { + toCommit->nextPC[tid] = inst->readNextNPC(); + } +#endif toCommit->includeSquashInst[tid] = false; @@ -825,9 +862,11 @@ DefaultIEW<Impl>::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); #endif +#endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); } @@ -837,7 +876,23 @@ template <class Impl> void DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " + "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { + DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" + " that occurs at or before delay slot [sn:%i].\n", + tid, bdelayDoneSeqNum[tid]); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " + "[sn:%i].\n", tid, insts[tid].front()->seqNum); + } +#endif + if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -1120,7 +1175,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) } if (!insts_to_dispatch.empty()) { - DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n"); + DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid); block(tid); toRename->iewUnblock[tid] = false; } @@ -1263,9 +1318,13 @@ DefaultIEW<Impl>::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); +#if THE_ISA == ALPHA_ISA DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextPC); - +#else + DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextNPC); +#endif // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 4c69ca384..d745faf7b 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 36e0842be..8faae3ebf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -993,7 +993,11 @@ InstructionQueue<Impl>::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. +#if THE_ISA == ALPHA_ISA squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; +#else + squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh index f8a9dd8cc..4937589e3 100755 --- a/src/cpu/o3/isa_specific.hh +++ b/src/cpu/o3/isa_specific.hh @@ -35,6 +35,11 @@ #include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/params.hh" #include "cpu/o3/alpha/dyn_inst.hh" +#elif THE_ISA == MIPS_ISA + #include "cpu/o3/mips/cpu.hh" + #include "cpu/o3/mips/impl.hh" + #include "cpu/o3/mips/params.hh" + #include "cpu/o3/mips/dyn_inst.hh" #else - #error "O3CPU doesnt support this ISA" + #error "ISA-specific header files O3CPU not defined ISA" #endif diff --git a/src/cpu/o3/mips/cpu.cc b/src/cpu/o3/mips/cpu.cc new file mode 100755 index 000000000..420f460b2 --- /dev/null +++ b/src/cpu/o3/mips/cpu.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/cpu_impl.hh" +#include "cpu/o3/mips/dyn_inst.hh" + +// Force instantiation of MipsO3CPU for all the implemntations that are +// needed. Consider merging this and mips_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (mips_impl.cc?). +template class MipsO3CPU<MipsSimpleImpl>; diff --git a/src/cpu/o3/mips/cpu.hh b/src/cpu/o3/mips/cpu.hh new file mode 100755 index 000000000..1813e8656 --- /dev/null +++ b/src/cpu/o3/mips/cpu.hh @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_CPU_HH__ +#define __CPU_O3_MIPS_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * MipsO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template <class Impl> +class MipsO3CPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState<Impl> ImplState; + typedef O3ThreadState<Impl> Thread; + typedef typename Impl::Params Params; + + /** Constructs an MipsO3CPU with the given parameters. */ + MipsO3CPU(Params *params); + + /** Registers statistics. */ + void regStats(); + + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); + + /** CPU read function, forwards read to LSQ. */ + template <class T> + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template <class T> + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_MIPS_CPU_HH__ diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc new file mode 100644 index 000000000..f1c3b33a5 --- /dev/null +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include <string> + +#include "cpu/base.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/fu_pool.hh" +#include "sim/builder.hh" + +class DerivO3CPU : public MipsO3CPU<MipsSimpleImpl> +{ + public: + DerivO3CPU(MipsSimpleParams *p) + : MipsO3CPU<MipsSimpleImpl>(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +Param<int> clock; +Param<int> numThreads; +Param<int> activity; + +SimObjectVectorParam<Process *> workload; + +SimObjectParam<MemObject *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +Param<unsigned> cachePorts; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; +Param<unsigned> issueWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; +SimObjectParam<FUPool *> fuPool; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; +Param<Tick> trapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; + +Param<std::string> predType; +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM_DFLT(activity, "Initial activity count", 0), + + INIT_PARAM(workload, "Processes to run"), + + INIT_PARAM(mem, "Memory"), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), + INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + +CREATE_SIM_OBJECT(DerivO3CPU) +{ + DerivO3CPU *cpu; + + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + MipsSimpleParams *params = new MipsSimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + params->activity = activity; + + params->workload = workload; + + params->mem = mem; + + params->checker = checker; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->cachePorts = cachePorts; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; + params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; + params->fuPool = fuPool; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + params->trapLatency = trapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivO3CPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) + diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh new file mode 100644 index 000000000..6b9f3ae10 --- /dev/null +++ b/src/cpu/o3/mips/cpu_impl.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "config/use_checker.hh" + +#include "arch/mips/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/mips/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +using namespace TheISA; + +template <class Impl> +MipsO3CPU<Impl>::MipsO3CPU(Params *params) + : FullO3CPU<Impl>(params) +{ + DPRINTF(O3CPU, "Creating MipsO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + + + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); + + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + MipsTC<Impl> *mips_tc = + new MipsTC<Impl>; + + tc = mips_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext<MipsTC<Impl> >( + mips_tc, this->checker); + } +#endif + + mips_tc->cpu = this; + mips_tc->thread = this->thread[i]; + + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template <class Impl> +void +MipsO3CPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template <class Impl> +MiscReg +MipsO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template <class Impl> +MiscReg +MipsO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template <class Impl> +Fault +MipsO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template <class Impl> +Fault +MipsO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template <class Impl> +void +MipsO3CPU<Impl>::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); + + DPRINTF(O3CPU, "[tid:%i] Register 2 is %i ", tid, this->readIntReg(2)); +} + +template <class Impl> +TheISA::IntReg +MipsO3CPU<Impl>::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(MipsISA::ArgumentReg0 + i, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(MipsISA::ArgumentReg0 + i, val, tid); +} + +template <class Impl> +void +MipsO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Mips syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/mips/dyn_inst.cc b/src/cpu/o3/mips/dyn_inst.cc new file mode 100755 index 000000000..216aa7d2c --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/dyn_inst_impl.hh" +#include "cpu/o3/mips/impl.hh" + +// Force instantiation of MipsDynInst for all the implementations that +// are needed. +template class MipsDynInst<MipsSimpleImpl>; diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh new file mode 100755 index 000000000..06bdfcec4 --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_DYN_INST_HH__ +#define __CPU_O3_MIPS_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" + +class Packet; + +/** + * Mostly implementation & ISA specific MipsDynInst. As with most + * other classes in the new CPU model, it is templated on the Impl to + * allow for passing in of all types, such as the CPU type and the ISA + * type. The MipsDynInst serves as the primary interface to the CPU + * for instructions that are executing. + */ +template <class Impl> +class MipsDynInst : public BaseDynInst<Impl> +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::O3CPU O3CPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Extended machine instruction type. */ + typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + O3CPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + MipsDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute(); + + /** Initiates the access. Only valid for memory operations. */ + Fault initiateAcc(); + + /** Completes the access. Only valid for memory operations. */ + Fault completeAcc(Packet *pkt); + + private: + /** Initializes variables. */ + void initVars(); + + public: + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg) + { + return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return this->cpu->readMiscRegWithEffect(misc_reg, fault, + this->threadNumber); + } + + /** Sets a misc. register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return this->cpu->setMiscRegWithEffect(misc_reg, val, + this->threadNumber); + } + + /** Calls a syscall. */ + void syscall(int64_t callnum); + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + BaseDynInst<Impl>::setIntReg(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + BaseDynInst<Impl>::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + BaseDynInst<Impl>::setFloatReg(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + /** Does the memory access part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_MIPS_DYN_INST_HH__ + diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh new file mode 100755 index 000000000..57dec1ccf --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/mips/dyn_inst.hh" + +template <class Impl> +MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) +{ + initVars(); +} + +template <class Impl> +MipsDynInst<Impl>::MipsDynInst(StaticInstPtr &_staticInst) + : BaseDynInst<Impl>(_staticInst) +{ + initVars(); +} + +template <class Impl> +void +MipsDynInst<Impl>::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } +} + +template <class Impl> +Fault +MipsDynInst<Impl>::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +MipsDynInst<Impl>::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +MipsDynInst<Impl>::completeAcc(Packet *pkt) +{ + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); + + return this->fault; +} + +template <class Impl> +void +MipsDynInst<Impl>::syscall(int64_t callnum) +{ + this->cpu->syscall(callnum, this->threadNumber); +} + diff --git a/src/cpu/o3/mips/impl.hh b/src/cpu/o3/mips/impl.hh new file mode 100644 index 000000000..ac7181a19 --- /dev/null +++ b/src/cpu/o3/mips/impl.hh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_IMPL_HH__ +#define __CPU_O3_MIPS_IMPL_HH__ + +#include "arch/mips/isa_traits.hh" + +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/cpu_policy.hh" + + +// Forward declarations. +template <class Impl> +class MipsDynInst; + +template <class Impl> +class MipsO3CPU; + +/** Implementation specific struct that defines several key types to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific O3CPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct MipsSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used, which defines all of the CPU stages. */ + typedef SimpleCPUPolicy<MipsSimpleImpl> CPUPol; + + /** The DynInst type to be used. */ + typedef MipsDynInst<MipsSimpleImpl> DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr<DynInst> DynInstPtr; + + /** The O3CPU type to be used. */ + typedef MipsO3CPU<MipsSimpleImpl> O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; + + /** The Params to be passed to each stage. */ + typedef MipsSimpleParams Params; + + enum { + MaxWidth = 8, + MaxThreads = 4 + }; +}; + +/** The O3Impl to be used. */ +typedef MipsSimpleImpl O3CPUImpl; + +#endif // __CPU_O3_MIPS_IMPL_HH__ diff --git a/src/cpu/o3/mips/params.hh b/src/cpu/o3/mips/params.hh new file mode 100644 index 000000000..d1ac62e21 --- /dev/null +++ b/src/cpu/o3/mips/params.hh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_PARAMS_HH__ +#define __CPU_O3_MIPS_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +//class MipsDTB; +//class MipsITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the MipsO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class MipsSimpleParams : public O3Params +{ + public: + MipsSimpleParams() {} + +#if FULL_SYSTEM + //Full System Paramater Objects place here + MipsITB *itb; + MipsDTB *dtb; +#endif +}; + +#endif // __CPU_O3_MIPS_PARAMS_HH__ diff --git a/src/cpu/o3/mips/thread_context.cc b/src/cpu/o3/mips/thread_context.cc new file mode 100755 index 000000000..0061a2a63 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext<MipsSimpleImpl>; + diff --git a/src/cpu/o3/mips/thread_context.hh b/src/cpu/o3/mips/thread_context.hh new file mode 100644 index 000000000..5b03426f1 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template <class Impl> +class MipsTC : public O3ThreadContext<Impl> +{ + public: + virtual uint64_t readNextNPC() + { + return this->cpu->readNextNPC(this->thread->readTid()); + } + + virtual void setNextNPC(uint64_t val) + { + this->cpu->setNextNPC(val, this->thread->readTid()); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Mips!"); } + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 034087feb..ba26a01dd 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -76,6 +76,7 @@ class DefaultRename // using a list instead of a queue. (Most other stages use a // queue) typedef std::list<DynInstPtr> InstQueue; + typedef typename std::list<DynInstPtr>::iterator ListIt; public: /** Overall rename status. Used to determine if the CPU can @@ -170,7 +171,7 @@ class DefaultRename void takeOverFrom(); /** Squashes all instructions in a thread. */ - void squash(unsigned tid); + void squash(const InstSeqNum &squash_seq_num, unsigned tid); /** Ticks rename, which processes all input signals and attempts to rename * as many instructions as possible. @@ -222,7 +223,7 @@ class DefaultRename bool unblock(unsigned tid); /** Executes actual squash, removing squashed instructions. */ - void doSquash(unsigned tid); + void doSquash(const InstSeqNum &squash_seq_num, unsigned tid); /** Removes a committed instruction's rename history. */ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 805a72808..ee251de00 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include <list> @@ -318,7 +319,7 @@ DefaultRename<Impl>::takeOverFrom() template <class Impl> void -DefaultRename<Impl>::squash(unsigned tid) +DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid) { DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid); @@ -341,19 +342,55 @@ DefaultRename<Impl>::squash(unsigned tid) unsigned squashCount = 0; for (int i=0; i<fromDecode->size; i++) { - if (fromDecode->insts[i]->threadNumber == tid) { + if (fromDecode->insts[i]->threadNumber == tid && + fromDecode->insts[i]->seqNum > squash_seq_num) { fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } + } + // Clear the instruction list and skid buffer in case they have any + // insts in them. Since we support multiple ISAs, we cant just: + // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is + // a possible delay slot inst for different architectures + // insts[tid].clear(); +#if THE_ISA == ALPHA_ISA insts[tid].clear(); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " + "[sn:%i].\n",tid, squash_seq_num); + ListIt ilist_it = insts[tid].begin(); + while (ilist_it != insts[tid].end()) { + if ((*ilist_it)->seqNum > squash_seq_num) { + (*ilist_it)->setSquashed(); + DPRINTF(Rename, "Squashing incoming decode instruction, " + "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); + } + ilist_it++; + } +#endif // Clear the skid buffer in case it has any data in it. + // See comments above. + // skidBuffer[tid].clear(); +#if THE_ISA == ALPHA_ISA skidBuffer[tid].clear(); - - doSquash(tid); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " + "until [sn:%i].\n", tid, squash_seq_num); + ListIt slist_it = skidBuffer[tid].begin(); + while (slist_it != skidBuffer[tid].end()) { + if ((*slist_it)->seqNum > squash_seq_num) { + (*slist_it)->setSquashed(); + DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" + "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); + } + slist_it++; + } +#endif + doSquash(squash_seq_num, tid); } template <class Impl> @@ -572,7 +609,7 @@ DefaultRename<Impl>::renameInsts(unsigned tid) if (inst->isSquashed()) { DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is " "squashed, skipping.\n", - tid, inst->seqNum, inst->threadNumber,inst->readPC()); + tid, inst->seqNum, inst->readPC()); ++renameSquashedInsts; @@ -707,9 +744,11 @@ DefaultRename<Impl>::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i=0; i < numThreads; i++) assert(insts[i].empty()); #endif +#endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); @@ -824,12 +863,10 @@ DefaultRename<Impl>::unblock(unsigned tid) template <class Impl> void -DefaultRename<Impl>::doSquash(unsigned tid) +DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid) { typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].begin(); - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; - // After a syscall squashes everything, the history buffer may be empty // but the ROB may still be squashing instructions. if (historyBuffer[tid].empty()) { @@ -963,8 +1000,9 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid) historyBuffer[tid].push_front(hb_entry); - DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, " - "[sn:%lli].\n",tid, + DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer " + "(size=%i), [sn:%lli].\n",tid, + historyBuffer[tid].size(), (*historyBuffer[tid].begin()).instSeqNum); // Tell the instruction to rename the appropriate destination @@ -1143,7 +1181,13 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); - squash(tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif + + squash(squashed_seq_num, tid); return true; } diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh index f8e4df3b7..544084c40 100644 --- a/src/cpu/o3/scoreboard.hh +++ b/src/cpu/o3/scoreboard.hh @@ -35,7 +35,7 @@ #include <iostream> #include <utility> #include <vector> -#include "arch/alpha/isa_traits.hh" +#include "arch/isa_traits.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index df8d1a6d8..9ca02b9f3 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -26,12 +26,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_THREAD_CONTEXT_HH__ #define __CPU_O3_THREAD_CONTEXT_HH__ +#include "cpu/thread_context.hh" #include "cpu/o3/isa_specific.hh" class EndQuiesceEvent; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 240696c2b..622455745 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt - * Korey Sewell */ #include "arch/utility.hh" @@ -455,6 +454,7 @@ BaseSimpleCPU::advancePC(Fault fault) #else thread->setNextPC(thread->readNextNPC()); thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + assert(thread->readNextPC() != thread->readNextNPC()); #endif } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index ea1a65148..0d356bb4c 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -214,6 +214,7 @@ class StaticInstBase : public RefCounted bool isIndirectCtrl() const { return flags[IsIndirectControl]; } bool isCondCtrl() const { return flags[IsCondControl]; } bool isUncondCtrl() const { return flags[IsUncondControl]; } + bool isCondDelaySlot() const { return flags[IsCondDelaySlot]; } bool isThreadSync() const { return flags[IsThreadSync]; } bool isSerializing() const { return flags[IsSerializing] || |