From c7f1cf1d58cf50118c18b1afc4c938eafba81492 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 13 Apr 2007 13:59:31 +0000 Subject: Remove most of the special handling for delay slots since they have to be squashed anyway on a mispredict. This is because the NNPC value they saw when executing was incorrect. --HG-- extra : convert_revision : b42c4eb28b4fbba66c65cbd0a5033bf886c1532d --- src/cpu/base_dyn_inst.hh | 2 ++ src/cpu/o3/comm.hh | 5 --- src/cpu/o3/commit_impl.hh | 75 ------------------------------------------- src/cpu/o3/cpu.cc | 12 ++----- src/cpu/o3/cpu.hh | 3 +- src/cpu/o3/decode_impl.hh | 65 +------------------------------------ src/cpu/o3/fetch.hh | 3 +- src/cpu/o3/fetch_impl.hh | 33 +++---------------- src/cpu/o3/iew.hh | 3 -- src/cpu/o3/iew_impl.hh | 74 ++++-------------------------------------- src/cpu/o3/inst_queue_impl.hh | 4 --- src/cpu/o3/rename_impl.hh | 49 ++-------------------------- 12 files changed, 20 insertions(+), 308 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index eed05c2f1..b02038b3e 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -709,7 +709,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ void setNextNPC(uint64_t val) { +#if ISA_HAS_DELAY_SLOT nextNPC = val; +#endif } /** Sets the ASID. */ diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index d96919007..8d7bb95f4 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -87,7 +87,6 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; - bool squashDelaySlot[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; uint64_t nextNPC[Impl::MaxThreads]; @@ -114,7 +113,6 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; - InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -169,9 +167,6 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; - InstSeqNum bdelayDoneSeqNum; - bool squashDelaySlot; - //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 65625065d..9dd5ed291 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -741,38 +741,15 @@ DefaultCommit::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; -#if ISA_HAS_DELAY_SLOT - InstSeqNum bdelay_done_seq_num = squashed_inst; - bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid]; - bool branchMispredict = fromIEW->branchMispredict[tid]; - - // Squashing/not squashing the branch delay slot only makes - // sense when you're squashing from a branch, ie from a branch - // mispredict. - if (branchMispredict && !squash_bdelay_slot) { - bdelay_done_seq_num++; - } -#endif - if (fromIEW->includeSquashInst[tid] == true) { squashed_inst--; -#if ISA_HAS_DELAY_SLOT - bdelay_done_seq_num--; -#endif } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; -#if ISA_HAS_DELAY_SLOT - rob->squash(bdelay_done_seq_num, tid); - toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; - toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; -#else rob->squash(squashed_inst, tid); - toIEW->commitInfo[tid].squashDelaySlot = true; -#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -809,10 +786,6 @@ DefaultCommit::commit() // Try to commit any instructions. commitInsts(); - } else { -#if ISA_HAS_DELAY_SLOT - skidInsert(); -#endif } //Check for any activity @@ -1164,37 +1137,13 @@ DefaultCommit::getInsts() { DPRINTF(Commit, "Getting instructions from Rename stage.\n"); -#if ISA_HAS_DELAY_SLOT - // Read any renamed instructions and place them into the ROB. - int insts_to_process = std::min((int)renameWidth, - (int)(fromRename->size + skidBuffer.size())); - int rename_idx = 0; - - DPRINTF(Commit, "%i insts available to process. Rename Insts:%i " - "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size, - skidBuffer.size()); -#else // Read any renamed instructions and place them into the ROB. int insts_to_process = std::min((int)renameWidth, fromRename->size); -#endif - for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) { DynInstPtr inst; -#if ISA_HAS_DELAY_SLOT - // Get insts from skidBuffer or from Rename - if (skidBuffer.size() > 0) { - DPRINTF(Commit, "Grabbing skidbuffer inst.\n"); - inst = skidBuffer.front(); - skidBuffer.pop(); - } else { - DPRINTF(Commit, "Grabbing rename inst.\n"); - inst = fromRename->insts[rename_idx++]; - } -#else inst = fromRename->insts[inst_num]; -#endif int tid = inst->threadNumber; if (!inst->isSquashed() && @@ -1216,30 +1165,6 @@ DefaultCommit::getInsts() inst->readPC(), inst->seqNum, tid); } } - -#if ISA_HAS_DELAY_SLOT - if (rename_idx < fromRename->size) { - DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n"); - - for (; - rename_idx < fromRename->size; - rename_idx++) { - DynInstPtr inst = fromRename->insts[rename_idx]; - - if (!inst->isSquashed()) { - DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, - inst->threadNumber); - skidBuffer.push(inst); - } else { - DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " - "squashed, skipping.\n", - inst->readPC(), inst->seqNum, inst->threadNumber); - } - } - } -#endif - } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 2e6a43f9c..b2b4645d2 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -696,7 +696,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid); + fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); @@ -1226,9 +1226,7 @@ FullO3CPU::removeFrontInst(DynInstPtr &inst) template void -FullO3CPU::removeInstsNotInROB(unsigned tid, - bool squash_delay_slot, - const InstSeqNum &delay_slot_seq_num) +FullO3CPU::removeInstsNotInROB(unsigned tid) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1259,12 +1257,6 @@ FullO3CPU::removeInstsNotInROB(unsigned tid, while (inst_it != end_it) { assert(!instList.empty()); -#if ISA_HAS_DELAY_SLOT - if(!squash_delay_slot && - delay_slot_seq_num >= (*inst_it)->seqNum) { - break; - } -#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index e71d05c8e..4b247e6e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -468,8 +468,7 @@ class FullO3CPU : public BaseO3CPU /** Remove all instructions that are not currently in the ROB. * There's also an option to not squash delay slot instructions.*/ - void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, - const InstSeqNum &delay_slot_seq_num); + void removeInstsNotInROB(unsigned tid); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 314864f94..c9d0a1885 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -49,8 +49,6 @@ DefaultDecode::DefaultDecode(O3CPU *_cpu, Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; - - squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -278,17 +276,12 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) #if ISA_HAS_DELAY_SLOT toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); - - toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; - squashAfterDelaySlot[tid] = false; - - InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; #else toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); +#endif InstSeqNum squash_seq_num = inst->seqNum; -#endif // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || @@ -309,30 +302,10 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { - -#if ISA_HAS_DELAY_SLOT - if (insts[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " - "instructions before delay slot [sn:%i]. %i insts" - "left in decode.\n", tid, squash_seq_num, - insts[tid].size()); - break; - } -#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { - -#if ISA_HAS_DELAY_SLOT - if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { - DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " - "instructions before delay slot [sn:%i]. %i insts" - "left in decode.\n", tid, squash_seq_num, - insts[tid].size()); - break; - } -#endif skidBuffer[tid].pop(); } @@ -760,48 +733,12 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end -#if !ISA_HAS_DELAY_SLOT squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); inst->setPredTarg(target, target + sizeof(TheISA::MachInst)); break; -#else - // If mispredicted as taken, then ignore delay slot - // instruction... else keep delay slot and squash - // after it is sent to rename - if (inst->readPredTaken() && inst->isCondDelaySlot()) { - DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." - "[sn:%i] PC %#x mispredicted as taken.\n", tid, - inst->seqNum, inst->PC); - bdelayDoneSeqNum[tid] = inst->seqNum; - squash(inst, inst->threadNumber); - Addr target = inst->branchTarget(); - inst->setPredTarg(target, - target + sizeof(TheISA::MachInst)); - break; - } else { - DPRINTF(Decode, "[tid:%i]: Misprediction detected at " - "[sn:%i] PC %#x, will squash after delay slot " - "inst. is sent to Rename\n", - tid, inst->seqNum, inst->PC); - bdelayDoneSeqNum[tid] = inst->seqNum + 1; - squashAfterDelaySlot[tid] = true; - squashInst[tid] = inst; - continue; - } -#endif } } - - if (squashAfterDelaySlot[tid]) { - assert(!inst->isSquashed()); - squash(squashInst[tid], squashInst[tid]->threadNumber); - Addr target = squashInst[tid]->branchTarget(); - squashInst[tid]->setPredTarg(target, - target + sizeof(TheISA::MachInst)); - assert(!inst->isSquashed()); - break; - } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 241935416..bb0057e7c 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -263,8 +263,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - bool squash_delay_slot, unsigned tid); + const InstSeqNum &seq_num, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index e16f97558..25498c7f3 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -774,20 +774,14 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - bool squash_delay_slot, unsigned tid) + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, new_NPC, tid); -#if ISA_HAS_DELAY_SLOT - // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); -#else // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, true, 0); -#endif + cpu->removeInstsNotInROB(tid); } template @@ -896,17 +890,10 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); - -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; -#endif // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, - doneSeqNum, - fromCommit->commitInfo[tid].squashDelaySlot, + fromCommit->commitInfo[tid].doneSeqNum, tid); // Also check if there's a mispredict that happened. @@ -955,18 +942,13 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) if (fetchStatus[tid] != Squashing) { -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; -#endif DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n", fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC); // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, - doneSeqNum, + fromDecode->decodeInfo[tid].doneSeqNum, tid); return true; @@ -1157,9 +1139,6 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots -#if !ISA_HAS_DELAY_SLOT -// predicted_branch |= -#endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1213,11 +1192,7 @@ DefaultFetch::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_NPC; nextNPC[tid] = next_NPC + instSize; -#if ISA_HAS_DELAY_SLOT - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); -#else DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); -#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index ce2991cfb..eef5a15d2 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -402,9 +402,6 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; - /** Keeps track of the last valid branch delay slot instss for threads */ - InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; - /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 62e656e93..050785818 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -69,7 +69,6 @@ DefaultIEW::DefaultIEW(O3CPU *_cpu, Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; - bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -410,31 +409,14 @@ DefaultIEW::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. -#if ISA_HAS_DELAY_SLOT - ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); -#else ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); -#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", - tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + tid, fromCommit->commitInfo[tid].doneSeqNum); while (!skidBuffer[tid].empty()) { -#if ISA_HAS_DELAY_SLOT - if (skidBuffer[tid].front()->seqNum <= - fromCommit->commitInfo[tid].bdelayDoneSeqNum) { - DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " - "that occur before delay slot [sn:%i].\n", - fromCommit->commitInfo[tid].bdelayDoneSeqNum, - tid); - break; - } else { - DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " - "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); - } -#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,8 +427,6 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } - bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; - emptyRenameInsts(tid); } @@ -462,38 +442,18 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->mispredPC[tid] = inst->readPC(); toCommit->branchMispredict[tid] = true; - int instSize = sizeof(TheISA::MachInst); #if ISA_HAS_DELAY_SLOT - bool branch_taken = + int instSize = sizeof(TheISA::MachInst); + toCommit->branchTaken[tid] = !(inst->readNextPC() + instSize == inst->readNextNPC() && (inst->readNextPC() == inst->readPC() + instSize || inst->readNextPC() == inst->readPC() + 2 * instSize)); - DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n", - branch_taken ? "true": "false", inst->seqNum); - - toCommit->branchTaken[tid] = branch_taken; - - bool squashDelaySlot = true; -// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst)); - DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n", - squashDelaySlot ? "true": "false", inst->seqNum); - toCommit->squashDelaySlot[tid] = squashDelaySlot; - //If we're squashing the delay slot, we need to pick back up at NextPC. - //Otherwise, NextPC isn't being squashed, so we should pick back up at - //NextNPC. - if (squashDelaySlot) { - toCommit->nextPC[tid] = inst->readNextPC(); - toCommit->nextNPC[tid] = inst->readNextNPC(); - } else { - toCommit->nextPC[tid] = inst->readNextNPC(); - toCommit->nextNPC[tid] = inst->readNextNPC() + instSize; - } #else toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); - toCommit->nextPC[tid] = inst->readNextPC(); - toCommit->nextNPC[tid] = inst->readNextPC() + instSize; #endif + toCommit->nextPC[tid] = inst->readNextPC(); + toCommit->nextNPC[tid] = inst->readNextNPC(); toCommit->includeSquashInst[tid] = false; @@ -510,11 +470,7 @@ DefaultIEW::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readNextPC(); -#if ISA_HAS_DELAY_SLOT toCommit->nextNPC[tid] = inst->readNextNPC(); -#else - toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst); -#endif toCommit->branchMispredict[tid] = false; toCommit->includeSquashInst[tid] = false; @@ -532,11 +488,7 @@ DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readPC(); -#if ISA_HAS_DELAY_SLOT toCommit->nextNPC[tid] = inst->readNextPC(); -#else - toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst); -#endif toCommit->branchMispredict[tid] = false; // Must include the broadcasted SN in the squash. @@ -880,10 +832,8 @@ DefaultIEW::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG -#if !ISA_HAS_DELAY_SLOT for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); -#endif #endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); @@ -894,21 +844,9 @@ template void DefaultIEW::emptyRenameInsts(unsigned tid) { - DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " - "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid); while (!insts[tid].empty()) { -#if ISA_HAS_DELAY_SLOT - if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { - DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" - " that occurs at or before delay slot [sn:%i].\n", - tid, bdelayDoneSeqNum[tid]); - break; - } else { - DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " - "[sn:%i].\n", tid, insts[tid].front()->seqNum); - } -#endif if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 10c3287f2..bdf5f07aa 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1005,11 +1005,7 @@ InstructionQueue::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. -#if ISA_HAS_DELAY_SLOT - squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; -#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 431705e19..6e7180b1e 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -356,47 +356,12 @@ DefaultRename::squash(const InstSeqNum &squash_seq_num, unsigned tid) } // Clear the instruction list and skid buffer in case they have any - // insts in them. Since we support multiple ISAs, we cant just: - // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is - // a possible delay slot inst for different architectures - // insts[tid].clear(); -#if ISA_HAS_DELAY_SLOT - DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " - "[sn:%i].\n",tid, squash_seq_num); - ListIt ilist_it = insts[tid].begin(); - while (ilist_it != insts[tid].end()) { - if ((*ilist_it)->seqNum > squash_seq_num) { - (*ilist_it)->setSquashed(); - DPRINTF(Rename, "Squashing incoming decode instruction, " - "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); - } - ilist_it++; - } -#else + // insts in them. insts[tid].clear(); -#endif // Clear the skid buffer in case it has any data in it. - // See comments above. - // skidBuffer[tid].clear(); -#if ISA_HAS_DELAY_SLOT - DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " - "until [sn:%i].\n", tid, squash_seq_num); - ListIt slist_it = skidBuffer[tid].begin(); - while (slist_it != skidBuffer[tid].end()) { - if ((*slist_it)->seqNum > squash_seq_num) { - (*slist_it)->setSquashed(); - DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" - "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); - } - slist_it++; - } - resumeUnblocking = (skidBuffer[tid].size() != 0); - DPRINTF(Rename, "Resume unblocking set to %s\n", - resumeUnblocking ? "true" : "false"); -#else skidBuffer[tid].clear(); -#endif + doSquash(squash_seq_num, tid); } @@ -776,10 +741,8 @@ DefaultRename::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG -#if !ISA_HAS_DELAY_SLOT for (int i=0; i < numThreads; i++) assert(insts[i].empty()); -#endif #endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; @@ -1248,13 +1211,7 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); -#if ISA_HAS_DELAY_SLOT - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; -#endif - - squash(squashed_seq_num, tid); + squash(fromCommit->commitInfo[tid].doneSeqNum, tid); return true; } -- cgit v1.2.3 From c3081d9c1c36e1a08c173048783d191fa19463de Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:13:18 +0000 Subject: Add support for microcode and pull out the special branch delay slot handling. Branch delay slots need to be squash on a mispredict as well because the nnpc they saw was incorrect. --HG-- extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719 --- src/cpu/base_dyn_inst.hh | 57 +++++++++++++++++++++--- src/cpu/base_dyn_inst_impl.hh | 55 +++++++++++++++++++++-- src/cpu/o3/comm.hh | 21 +++++---- src/cpu/o3/commit.hh | 34 ++++++++++++--- src/cpu/o3/commit_impl.hh | 11 ++--- src/cpu/o3/cpu.cc | 30 ++++++++++++- src/cpu/o3/cpu.hh | 22 +++++++--- src/cpu/o3/decode_impl.hh | 4 +- src/cpu/o3/fetch.hh | 17 ++++---- src/cpu/o3/fetch_impl.hh | 91 +++++++++++++++++++++++++-------------- src/cpu/o3/iew_impl.hh | 1 + src/cpu/o3/rename_impl.hh | 6 ++- src/cpu/o3/sparc/dyn_inst.hh | 10 ++++- src/cpu/o3/sparc/dyn_inst_impl.hh | 17 +++++++- 14 files changed, 291 insertions(+), 85 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index b02038b3e..1311e5cf2 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** PC of this instruction. */ Addr PC; + /** Micro PC of this instruction. */ + Addr microPC; + protected: /** Next non-speculative PC. It is not filled in at fetch, but rather * once the target of the branch is truly known (either decode or @@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Next non-speculative NPC. Target PC for Mips or Sparc. */ Addr nextNPC; + /** Next non-speculative micro PC. */ + Addr nextMicroPC; + /** Predicted next PC. */ Addr predPC; /** Predicted next NPC. */ Addr predNPC; + /** Predicted next microPC */ + Addr predMicroPC; + /** If this is a branch that was predicted taken */ bool predTaken; @@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted { _flatDestRegIdx[idx] = flattened_dest; } + /** BaseDynInst constructor given a binary instruction. + * @param staticInst A StaticInstPtr to the underlying instruction. + * @param PC The PC of the instruction. + * @param pred_PC The predicted next PC. + * @param pred_NPC The predicted next NPC. + * @param seq_num The sequence number of the instruction. + * @param cpu Pointer to the instruction's CPU. + */ + BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a binary instruction. * @param inst The binary instruction. @@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param seq_num The sequence number of the instruction. * @param cpu Pointer to the instruction's CPU. */ - BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr pred_PC, Addr pred_NPC, + BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. @@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted #endif } + Addr readNextMicroPC() + { + return nextMicroPC; + } + /** Set the predicted target of this current instruction. */ - void setPredTarg(Addr predicted_PC, Addr predicted_NPC) + void setPredTarg(Addr predicted_PC, Addr predicted_NPC, + Addr predicted_MicroPC) { predPC = predicted_PC; predNPC = predicted_NPC; + predMicroPC = predicted_MicroPC; } /** Returns the predicted PC immediately after the branch. */ @@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the predicted PC two instructions after the branch */ Addr readPredNPC() { return predNPC; } + /** Returns the predicted micro PC after the branch */ + Addr readPredMicroPC() { return predMicroPC; } + /** Returns whether the instruction was predicted taken or not. */ bool readPredTaken() { @@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted bool mispredicted() { return readPredPC() != readNextPC() || - readPredNPC() != readNextNPC(); + readPredNPC() != readNextNPC() || + readPredMicroPC() != readNextMicroPC(); } // @@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isQuiesce() const { return staticInst->isQuiesce(); } bool isIprAccess() const { return staticInst->isIprAccess(); } bool isUnverifiable() const { return staticInst->isUnverifiable(); } + bool isMacroOp() const { return staticInst->isMacroOp(); } + bool isMicroOp() const { return staticInst->isMicroOp(); } + bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } + bool isLastMicroOp() const { return staticInst->isLastMicroOp(); } + bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); } + bool isMicroBranch() const { return staticInst->isMicroBranch(); } /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -700,20 +737,28 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } + /**Read the micro PC of this instruction. */ + const Addr readMicroPC() const { return microPC; } + /** Set the next PC of this instruction (its actual target). */ - void setNextPC(uint64_t val) + void setNextPC(Addr val) { nextPC = val; } /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ - void setNextNPC(uint64_t val) + void setNextNPC(Addr val) { #if ISA_HAS_DELAY_SLOT nextNPC = val; #endif } + void setNextMicroPC(Addr val) + { + nextMicroPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index a1c866336..acf8af9cf 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -62,19 +62,66 @@ my_hash_t thishash; #endif template -BaseDynInst::BaseDynInst(TheISA::ExtMachInst machInst, +BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu) + : staticInst(_staticInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + PC = inst_PC; - nextPC = inst_NPC; - nextNPC = nextPC + sizeof(TheISA::MachInst); + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } + predPC = pred_PC; + predNPC = pred_NPC; + predMicroPC = pred_MicroPC; + predTaken = false; + + initVars(); +} + +template +BaseDynInst::BaseDynInst(TheISA::ExtMachInst inst, + Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, + Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu) + : staticInst(inst), traceData(NULL), cpu(cpu) +{ + seqNum = seq_num; + + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + + PC = inst_PC; + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } predPC = pred_PC; predNPC = pred_NPC; + predMicroPC = pred_MicroPC; predTaken = false; initVars(); diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 8d7bb95f4..fb772060b 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; - uint64_t mispredPC[Impl::MaxThreads]; - uint64_t nextPC[Impl::MaxThreads]; - uint64_t nextNPC[Impl::MaxThreads]; + Addr mispredPC[Impl::MaxThreads]; + Addr nextPC[Impl::MaxThreads]; + Addr nextNPC[Impl::MaxThreads]; + Addr nextMicroPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; bool includeSquashInst[Impl::MaxThreads]; @@ -118,9 +119,10 @@ struct TimeBufStruct { // struct as it is used pretty frequently. bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; unsigned branchCount; }; @@ -158,9 +160,10 @@ struct TimeBufStruct { bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index fba618c14..27bdd20c5 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -279,25 +279,37 @@ class DefaultCommit /** Returns the PC of the head instruction of the ROB. * @todo: Probably remove this function as it returns only thread 0. */ - uint64_t readPC() { return PC[0]; } + Addr readPC() { return PC[0]; } /** Returns the PC of a specific thread. */ - uint64_t readPC(unsigned tid) { return PC[tid]; } + Addr readPC(unsigned tid) { return PC[tid]; } /** Sets the PC of a specific thread. */ - void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + void setPC(Addr val, unsigned tid) { PC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid) { return microPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; } /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } + Addr readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } + void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; } /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } + Addr readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } + void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; } private: /** Time buffer interface. */ @@ -402,12 +414,20 @@ class DefaultCommit */ Addr PC[Impl::MaxThreads]; + /** The commit micro PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ + Addr microPC[Impl::MaxThreads]; + /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; + /** The next micro PC of each thread. */ + Addr nextMicroPC[Impl::MaxThreads]; + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 9dd5ed291..fc24d7edc 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -124,7 +124,7 @@ DefaultCommit::DefaultCommit(O3CPU *_cpu, Params *params) committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = nextNPC[i] = 0; + microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0; } #if FULL_SYSTEM interrupt = NoFault; @@ -508,6 +508,7 @@ DefaultCommit::squashAll(unsigned tid) toIEW->commitInfo[tid].nextPC = PC[tid]; toIEW->commitInfo[tid].nextNPC = nextPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid]; } template @@ -768,6 +769,7 @@ DefaultCommit::commit() toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid]; toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; @@ -877,6 +879,7 @@ DefaultCommit::commitInsts() PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); nextNPC[tid] = head_inst->readNextNPC(); + nextMicroPC[tid] = head_inst->readNextMicroPC(); // Increment the total number of non-speculative instructions // executed. @@ -905,12 +908,10 @@ DefaultCommit::commitInsts() } PC[tid] = nextPC[tid]; -#if ISA_HAS_DELAY_SLOT nextPC[tid] = nextNPC[tid]; nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); -#else - nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); -#endif + microPC[tid] = nextMicroPC[tid]; + nextMicroPC[tid] = microPC[tid] + 1; #if FULL_SYSTEM int count = 0; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index b2b4645d2..59978a065 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -696,7 +696,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid); + fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); @@ -1150,6 +1150,20 @@ FullO3CPU::setPC(Addr new_PC,unsigned tid) commit.setPC(new_PC, tid); } +template +uint64_t +FullO3CPU::readMicroPC(unsigned tid) +{ + return commit.readMicroPC(tid); +} + +template +void +FullO3CPU::setMicroPC(Addr new_PC,unsigned tid) +{ + commit.setMicroPC(new_PC, tid); +} + template uint64_t FullO3CPU::readNextPC(unsigned tid) @@ -1178,6 +1192,20 @@ FullO3CPU::setNextNPC(uint64_t val,unsigned tid) commit.setNextNPC(val, tid); } +template +uint64_t +FullO3CPU::readNextMicroPC(unsigned tid) +{ + return commit.readNextMicroPC(tid); +} + +template +void +FullO3CPU::setNextMicroPC(Addr new_PC,unsigned tid) +{ + commit.setNextMicroPC(new_PC, tid); +} + template typename FullO3CPU::ListIt FullO3CPU::addInst(DynInstPtr &inst) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 4b247e6e3..bff78bf9e 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); /** Reads the commit PC of a specific thread. */ - uint64_t readPC(unsigned tid); + Addr readPC(unsigned tid); /** Sets the commit PC of a specific thread. */ void setPC(Addr new_PC, unsigned tid); + /** Reads the commit micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid); + + /** Sets the commmit micro PC of a specific thread. */ + void setMicroPC(Addr new_microPC, unsigned tid); + /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid); + Addr readNextPC(unsigned tid); /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid); + void setNextPC(Addr val, unsigned tid); /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid); + Addr readNextNPC(unsigned tid); /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid); + void setNextNPC(Addr val, unsigned tid); + + /** Reads the commit next micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid); + + /** Sets the commit next micro PC of a specific thread. */ + void setNextMicroPC(Addr val, unsigned tid); /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index c9d0a1885..ce6738456 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -273,6 +273,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) ///explicitly for ISAs with delay slots. toFetch->decodeInfo[tid].nextNPC = inst->branchTarget() + sizeof(TheISA::MachInst); + toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC(); #if ISA_HAS_DELAY_SLOT toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); @@ -735,7 +736,8 @@ DefaultDecode::decodeInsts(unsigned tid) // a check at the end squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); - inst->setPredTarg(target, target + sizeof(TheISA::MachInst)); + //The micro pc after an instruction level branch should be 0 + inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0); break; } } diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index bb0057e7c..7645a226c 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -227,7 +227,7 @@ class DefaultFetch * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -242,12 +242,14 @@ class DefaultFetch bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid); /** Squashes a specific thread and resets the PC. */ - inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid); + inline void doSquash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, unsigned tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode that should be sqaushed. */ void squashFromDecode(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Checks if a thread is stalled. */ @@ -263,6 +265,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching @@ -346,16 +349,12 @@ class DefaultFetch /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; + /** Per-thread fetch micro PC. */ + Addr microPC[Impl::MaxThreads]; + /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; - /** Per-thread next Next PC. - * This is not a real register but is used for - * architectures that use a branch-delay slot. - * (such as MIPS or Sparc) - */ - Addr nextNPC[Impl::MaxThreads]; - /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25498c7f3..d1f38e38b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -312,7 +312,7 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); - nextNPC[tid] = cpu->readNextNPC(tid); + microPC[tid] = cpu->readMicroPC(tid); } for (int tid=0; tid < numThreads; tid++) { @@ -439,11 +439,7 @@ DefaultFetch::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); -#if ISA_HAS_DELAY_SLOT - nextNPC[i] = cpu->readNextNPC(i); -#else - nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst); -#endif + microPC[i] = cpu->readMicroPC(i); fetchStatus[i] = Running; } numInst = 0; @@ -493,7 +489,7 @@ DefaultFetch::switchToInactive() template bool DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, - Addr &next_NPC) + Addr &next_NPC, Addr &next_MicroPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -501,13 +497,22 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - next_PC = next_NPC; - next_NPC = next_NPC + instSize; - inst->setPredTarg(next_PC, next_NPC); + if (inst->isMicroOp() && !inst->isLastMicroOp()) { + next_MicroPC++; + } else { + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + next_MicroPC = 0; + } + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(false); return false; } + //Assume for now that all control flow is to a different macroop which + //would reset the micro pc to 0. + next_MicroPC = 0; + int tid = inst->threadNumber; Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); @@ -534,7 +539,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, #endif /* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", tid, next_PC, next_NPC);*/ - inst->setPredTarg(next_PC, next_NPC); + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -658,14 +663,14 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template inline void DefaultFetch::doSquash(const Addr &new_PC, - const Addr &new_NPC, unsigned tid) + const Addr &new_NPC, const Addr &new_microPC, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", tid, new_PC, new_NPC); PC[tid] = new_PC; nextPC[tid] = new_NPC; - nextNPC[tid] = new_NPC + instSize; + microPC[tid] = new_microPC; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -693,12 +698,12 @@ DefaultFetch::doSquash(const Addr &new_PC, template void DefaultFetch::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - unsigned tid) + const Addr &new_MicroPC, + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are in flight between // fetch and decode. @@ -774,11 +779,12 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); @@ -893,6 +899,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, + fromCommit->commitInfo[tid].nextMicroPC, fromCommit->commitInfo[tid].doneSeqNum, tid); @@ -948,6 +955,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, + fromDecode->decodeInfo[tid].nextMicroPC, fromDecode->decodeInfo[tid].doneSeqNum, tid); @@ -1002,9 +1010,9 @@ DefaultFetch::fetch(bool &status_change) DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); // The current PC. - Addr &fetch_PC = PC[tid]; - - Addr &fetch_NPC = nextPC[tid]; + Addr fetch_PC = PC[tid]; + Addr fetch_NPC = nextPC[tid]; + Addr fetch_MicroPC = microPC[tid]; // Fault code for memory access. Fault fault = NoFault; @@ -1063,6 +1071,7 @@ DefaultFetch::fetch(bool &status_change) Addr next_PC = fetch_PC; Addr next_NPC = fetch_NPC; + Addr next_MicroPC = fetch_MicroPC; InstSeqNum inst_seq; MachInst inst; @@ -1070,6 +1079,9 @@ DefaultFetch::fetch(bool &status_change) // @todo: Fix this hack. unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + StaticInstPtr staticInst = NULL; + StaticInstPtr macroop = NULL; + if (fault == NoFault) { // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the @@ -1104,19 +1116,29 @@ DefaultFetch::fetch(bool &status_change) // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); - // Get the instruction from the array of the cache line. - inst = TheISA::gtoh(*reinterpret_cast - (&cacheData[tid][offset])); + if (!macroop) { + // Get the instruction from the array of the cache line. + inst = TheISA::gtoh(*reinterpret_cast + (&cacheData[tid][offset])); - predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(fetch_PC, 0, inst); + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, 0, inst); - ext_inst = predecoder.getExtMachInst(); + ext_inst = predecoder.getExtMachInst(); + staticInst = StaticInstPtr(ext_inst); + if (staticInst->isMacroOp()) + macroop = staticInst; + } + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); instruction->setTid(tid); @@ -1139,7 +1161,7 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC); + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); predicted_branch |= (next_PC != fetch_NPC); // Add instruction to the CPU's list of instructions. @@ -1157,6 +1179,7 @@ DefaultFetch::fetch(bool &status_change) // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; if (instruction->isQuiesce()) { DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", @@ -1167,7 +1190,8 @@ DefaultFetch::fetch(bool &status_change) break; } - offset += instSize; + if (!macroop) + offset += instSize; } if (offset >= cacheBlkSize) { @@ -1191,7 +1215,7 @@ DefaultFetch::fetch(bool &status_change) if (fault == NoFault) { PC[tid] = next_PC; nextPC[tid] = next_NPC; - nextNPC[tid] = next_NPC + instSize; + microPC[tid] = next_MicroPC; DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1210,8 +1234,9 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; + StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, + DynInstPtr instruction = new DynInst(staticInst, fetch_PC, fetch_NPC, next_PC, next_NPC, inst_seq, cpu); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 050785818..399c44909 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -454,6 +454,7 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) #endif toCommit->nextPC[tid] = inst->readNextPC(); toCommit->nextNPC[tid] = inst->readNextNPC(); + toCommit->nextMicroPC[tid] = inst->readNextMicroPC(); toCommit->includeSquashInst[tid] = false; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 6e7180b1e..d78de2c87 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -963,6 +963,7 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg); } inst->flattenSrcReg(src_idx, flat_src_reg); @@ -979,9 +980,11 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // See if the register is ready or not. if (scoreboard->getReg(renamed_reg) == true) { - DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid); + DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg); inst->markSrcRegReady(src_idx); + } else { + DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg); } ++renameRenameLookups; @@ -1008,6 +1011,7 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg); } inst->flattenDestReg(dest_idx, flat_dest_reg); diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh index 72242b161..a7ab6cd79 100644 --- a/src/cpu/o3/sparc/dyn_inst.hh +++ b/src/cpu/o3/sparc/dyn_inst.hh @@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu); + SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + + /** BaseDynInst constructor given a binary instruction. */ + SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ SparcDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh index c4d30b6f4..6bfe97717 100644 --- a/src/cpu/o3/sparc/dyn_inst_impl.hh +++ b/src/cpu/o3/sparc/dyn_inst_impl.hh @@ -30,11 +30,24 @@ #include "cpu/o3/sparc/dyn_inst.hh" +template +SparcDynInst::SparcDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + template SparcDynInst::SparcDynInst(TheISA::ExtMachInst inst, - Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } -- cgit v1.2.3 From 308b2f0ce3215eaaed69da937555008f9ed36835 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 15 Apr 2007 21:51:05 +0000 Subject: Add extra constructors to Alpha and MIPS --HG-- extra : convert_revision : 26ea87bfe9e5c27134eb9a15bf9e4629afae6c69 --- src/cpu/o3/alpha/dyn_inst.hh | 9 +++++++-- src/cpu/o3/alpha/dyn_inst_impl.hh | 19 +++++++++++++++++-- src/cpu/o3/mips/dyn_inst.hh | 10 ++++++++-- src/cpu/o3/mips/dyn_inst_impl.hh | 18 +++++++++++++++--- 4 files changed, 47 insertions(+), 9 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh index 20759d849..a6fb7b885 100644 --- a/src/cpu/o3/alpha/dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh index fdce1ade5..6dfe0ccdd 100644 --- a/src/cpu/o3/alpha/dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -31,10 +31,25 @@ #include "cpu/o3/alpha/dyn_inst.hh" template -AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, +AlphaDynInst::AlphaDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, Addr Pred_PC, Addr Pred_NPC, + Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + +template +AlphaDynInst::AlphaDynInst(ExtMachInst inst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, + Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh index 366b4bb23..cf78c0941 100755 --- a/src/cpu/o3/mips/dyn_inst.hh +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -69,10 +69,16 @@ class MipsDynInst : public BaseDynInst }; public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + /** BaseDynInst constructor given a binary instruction. */ MipsDynInst(ExtMachInst inst, - Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh index c0f9ae771..7e8697b32 100755 --- a/src/cpu/o3/mips/dyn_inst_impl.hh +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -30,12 +30,24 @@ #include "cpu/o3/mips/dyn_inst.hh" +template +MipsDynInst::MipsDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + template MipsDynInst::MipsDynInst(ExtMachInst inst, - Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } -- cgit v1.2.3 From 8248af53b19a633ae6d9aa8cd6b5a12cfa3b1644 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 15 Apr 2007 21:52:38 +0000 Subject: Make an inner loop which pulls microops out of macroops. These aren't checked for control flow because we can pull out microops until we run out of buffer. This prevents microops from being interpretted as branches because the pc doesn't become npc. --HG-- extra : convert_revision : 9fff7c6c32900692bbc567ecb75701c9c73da259 --- src/cpu/o3/fetch_impl.hh | 127 ++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 63 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index d1f38e38b..3ae7bc402 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1094,11 +1094,9 @@ DefaultFetch::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; - for (; - offset < cacheBlkSize && - numInst < fetchWidth && - !predicted_branch; - ++numInst) { + while (offset < cacheBlkSize && + numInst < fetchWidth && + !predicted_branch) { // If we're branching after this instruction, quite fetching // from the same block then. @@ -1109,10 +1107,6 @@ DefaultFetch::fetch(bool &status_change) fetch_PC, fetch_NPC); } - - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); @@ -1129,80 +1123,87 @@ DefaultFetch::fetch(bool &status_change) if (staticInst->isMacroOp()) macroop = staticInst; } - if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) - macroop = NULL; - } + do { + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } - // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, fetch_MicroPC, - next_PC, next_NPC, next_MicroPC, - inst_seq, cpu); - instruction->setTid(tid); + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); - instruction->setASID(tid); + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, + inst_seq, cpu); + instruction->setTid(tid); - instruction->setThreadState(cpu->thread[tid]); + instruction->setASID(tid); - DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " - "[sn:%lli]\n", - tid, instruction->readPC(), inst_seq); + instruction->setThreadState(cpu->thread[tid]); - //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); + DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " + "[sn:%lli]\n", + tid, instruction->readPC(), inst_seq); - DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", - tid, instruction->staticInst->disassemble(fetch_PC)); + //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); - instruction->traceData = - Trace::getInstRecord(curTick, cpu->tcBase(tid), - instruction->staticInst, - instruction->readPC()); + DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", + tid, instruction->staticInst->disassemble(fetch_PC)); - ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - predicted_branch |= (next_PC != fetch_NPC); + instruction->traceData = + Trace::getInstRecord(curTick, cpu->tcBase(tid), + instruction->staticInst, + instruction->readPC()); - // Add instruction to the CPU's list of instructions. - instruction->setInstListIt(cpu->addInst(instruction)); + ///FIXME This needs to be more robust in dealing with delay slots + predicted_branch |= + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - // Write the instruction to the first slot in the queue - // that heads to decode. - toDecode->insts[numInst] = instruction; + // Add instruction to the CPU's list of instructions. + instruction->setInstListIt(cpu->addInst(instruction)); - toDecode->size++; + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[numInst] = instruction; - // Increment stat of fetched instructions. - ++fetchedInsts; + toDecode->size++; - // Move to the next instruction, unless we have a branch. - fetch_PC = next_PC; - fetch_NPC = next_NPC; - fetch_MicroPC = next_MicroPC; + // Increment stat of fetched instructions. + ++fetchedInsts; - if (instruction->isQuiesce()) { - DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", - curTick); - fetchStatus[tid] = QuiescePending; - ++numInst; - status_change = true; - break; - } + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; + + if (instruction->isQuiesce()) { + DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", + curTick); + fetchStatus[tid] = QuiescePending; + ++numInst; + status_change = true; + break; + } - if (!macroop) - offset += instSize; + ++numInst; + } while (staticInst->isMicroOp() && + !staticInst->isLastMicroOp() && + numInst < fetchWidth); + offset += instSize; } - if (offset >= cacheBlkSize) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " - "block.\n", tid); + if (predicted_branch) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (predicted_branch) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " - "instruction encountered.\n", tid); + } else if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); } } -- cgit v1.2.3 From acc62514b1a4244182a7e5fad8ca03505389d94d Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 22 Apr 2007 17:50:43 +0000 Subject: Make the floating point zero register special handling only apply for ALPHA. --HG-- extra : convert_revision : 4f393a5471656b29cecbacfcb337992239775915 --- src/cpu/o3/free_list.hh | 2 ++ src/cpu/o3/regfile.hh | 4 ++++ src/cpu/o3/rename_map.cc | 4 ++++ src/cpu/o3/scoreboard.cc | 15 +++++++++++++++ 4 files changed, 25 insertions(+) (limited to 'src/cpu') diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index c669b0b34..42fc0c533 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -168,7 +168,9 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg) if (freed_reg != TheISA::ZeroReg) freeIntRegs.push(freed_reg); } else if (freed_reg < numPhysicalRegs) { +#if THE_ISA == ALPHA_ISA if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs)) +#endif freeFloatRegs.push(freed_reg); } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index b5b1cd021..75d3fa6eb 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -179,7 +179,9 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); +#if THE_ISA == ALPHA_ISA if (reg_idx != TheISA::ZeroReg) +#endif floatRegFile[reg_idx].d = val; } @@ -194,7 +196,9 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); +#if THE_ISA == ALPHA_ISA if (reg_idx != TheISA::ZeroReg) +#endif floatRegFile[reg_idx].d = val; } diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b436ec1c3..e6649ce3e 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -165,17 +165,21 @@ SimpleRenameMap::rename(RegIndex arch_reg) // If it's not referencing the zero register, then rename the // register. +#if THE_ISA == ALPHA_ISA if (arch_reg != floatZeroReg) { +#endif renamed_reg = freeList->getFloatReg(); floatRenameMap[arch_reg].physical_reg = renamed_reg; assert(renamed_reg < numPhysicalRegs && renamed_reg >= numPhysicalIntRegs); +#if THE_ISA == ALPHA_ISA } else { // Otherwise return the zero register so nothing bad happens. renamed_reg = floatZeroReg; } +#endif } else { // Subtract off the base offset for miscellaneous registers. arch_reg = arch_reg - numLogicalRegs; diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc index 1859b35a4..e7f8b7949 100644 --- a/src/cpu/o3/scoreboard.cc +++ b/src/cpu/o3/scoreboard.cc @@ -29,6 +29,7 @@ * Kevin Lim */ +#include "arch/isa_specific.hh" #include "cpu/o3/scoreboard.hh" Scoreboard::Scoreboard(unsigned activeThreads, @@ -79,11 +80,18 @@ Scoreboard::name() const bool Scoreboard::getReg(PhysRegIndex phys_reg) { +#if THE_ISA == ALPHA_ISA // Always ready if int or fp zero reg. if (phys_reg == zeroRegIdx || phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { return 1; } +#else + // Always ready if int zero reg. + if (phys_reg == zeroRegIdx) { + return 1; + } +#endif return regScoreBoard[phys_reg]; } @@ -99,11 +107,18 @@ Scoreboard::setReg(PhysRegIndex phys_reg) void Scoreboard::unsetReg(PhysRegIndex ready_reg) { +#if THE_ISA == ALPHA_ISA if (ready_reg == zeroRegIdx || ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { // Don't do anything if int or fp zero reg. return; } +#else + if (ready_reg == zeroRegIdx) { + // Don't do anything if int zero reg. + return; + } +#endif regScoreBoard[ready_reg] = 0; } -- cgit v1.2.3 From debf04aef1b0f662e981507545cdac956dd22a47 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 12 May 2007 15:11:44 -0700 Subject: Make sure all addresses used in syscalls are truncated to 32 bits. Actually -all- arguements are truncated to 32 bits, but we should be able to get away with it. --HG-- extra : convert_revision : 3b8766c68a4ab36e2e769fac4812657f3f7e0d1c --- src/cpu/o3/sparc/cpu_impl.hh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/cpu') diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh index 50d980f55..2e398577e 100644 --- a/src/cpu/o3/sparc/cpu_impl.hh +++ b/src/cpu/o3/sparc/cpu_impl.hh @@ -272,7 +272,10 @@ SparcO3CPU::getSyscallArg(int i, int tid) { TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid), SparcISA::ArgumentReg0 + i); - return this->readArchIntReg(idx, tid); + TheISA::IntReg val = this->readArchIntReg(idx, tid); + if (bits(this->readMiscRegNoEffect(SparcISA::MISCREG_PSTATE, tid), 3, 3)) + val = bits(val, 31, 0); + return val; } template -- cgit v1.2.3 From df7730b6774a730d554bfaa469ad95eeeffd3dc9 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 20 Jun 2007 19:46:45 -0700 Subject: Fix compiler errors. --HG-- extra : convert_revision : 2b10076a24cb36cb748e299011ae691f09c158cd --- src/cpu/base_dyn_inst.hh | 8 ++++---- src/cpu/base_dyn_inst_impl.hh | 6 +++--- src/cpu/o3/fetch_impl.hh | 23 +++++++++++------------ 3 files changed, 18 insertions(+), 19 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 1311e5cf2..a55c1e3c0 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -498,11 +498,11 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isQuiesce() const { return staticInst->isQuiesce(); } bool isIprAccess() const { return staticInst->isIprAccess(); } bool isUnverifiable() const { return staticInst->isUnverifiable(); } - bool isMacroOp() const { return staticInst->isMacroOp(); } - bool isMicroOp() const { return staticInst->isMicroOp(); } + bool isMacroop() const { return staticInst->isMacroop(); } + bool isMicroop() const { return staticInst->isMicroop(); } bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } - bool isLastMicroOp() const { return staticInst->isLastMicroOp(); } - bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); } + bool isLastMicroop() const { return staticInst->isLastMicroop(); } + bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } /** Temporarily sets this instruction as a serialize before instruction. */ diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index acf8af9cf..5c18ae694 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -73,7 +73,7 @@ BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, seqNum = seq_num; bool nextIsMicro = - staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + staticInst->isMicroop() && !staticInst->isLastMicroop(); PC = inst_PC; microPC = inst_MicroPC; @@ -101,12 +101,12 @@ BaseDynInst::BaseDynInst(TheISA::ExtMachInst inst, Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(inst), traceData(NULL), cpu(cpu) + : staticInst(inst, inst_PC), traceData(NULL), cpu(cpu) { seqNum = seq_num; bool nextIsMicro = - staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + staticInst->isMicroop() && !staticInst->isLastMicroop(); PC = inst_PC; microPC = inst_MicroPC; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 0fd1e7bac..857a08629 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -498,7 +498,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - if (inst->isMicroOp() && !inst->isLastMicroOp()) { + if (inst->isMicroop() && !inst->isLastMicroop()) { next_MicroPC++; } else { next_PC = next_NPC; @@ -1120,14 +1120,14 @@ DefaultFetch::fetch(bool &status_change) predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst); ext_inst = predecoder.getExtMachInst(); - staticInst = StaticInstPtr(ext_inst); - if (staticInst->isMacroOp()) + staticInst = StaticInstPtr(ext_inst, fetch_PC); + if (staticInst->isMacroop()) macroop = staticInst; } do { if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) + staticInst = macroop->fetchMicroop(fetch_MicroPC); + if (staticInst->isLastMicroop()) macroop = NULL; } @@ -1194,8 +1194,8 @@ DefaultFetch::fetch(bool &status_change) } ++numInst; - } while (staticInst->isMicroOp() && - !staticInst->isLastMicroOp() && + } while (staticInst->isMicroop() && + !staticInst->isLastMicroop() && numInst < fetchWidth); offset += instSize; } @@ -1240,13 +1240,12 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; - StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); - instruction->setPredTarg(next_PC, next_NPC); + instruction->setPredTarg(next_PC, next_NPC, 1); instruction->setTid(tid); instruction->setASID(tid); -- cgit v1.2.3