diff options
Diffstat (limited to 'src/cpu/o3/fetch_impl.hh')
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 196 |
1 files changed, 73 insertions, 123 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index fe320fa79..90d3868a5 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); -#if ISA_HAS_DELAY_SLOT nextNPC[tid] = cpu->readNextNPC(tid); -#endif } // Size of cache block. @@ -343,11 +341,6 @@ DefaultFetch<Impl>::initStage() cacheDataPC[tid] = 0; cacheDataValid[tid] = false; - delaySlotInfo[tid].branchSeqNum = -1; - delaySlotInfo[tid].numInsts = 0; - delaySlotInfo[tid].targetAddr = 0; - delaySlotInfo[tid].targetReady = false; - stalls[tid].decode = false; stalls[tid].rename = false; stalls[tid].iew = false; @@ -441,10 +434,8 @@ DefaultFetch<Impl>::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if ISA_HAS_DELAY_SLOT nextNPC[i] = cpu->readNextNPC(i); - delaySlotInfo[i].branchSeqNum = -1; - delaySlotInfo[i].numInsts = 0; - delaySlotInfo[i].targetAddr = 0; - delaySlotInfo[i].targetReady = false; +#else + nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst); #endif fetchStatus[i] = Running; } @@ -503,54 +494,41 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { -#if ISA_HAS_DELAY_SLOT - Addr cur_PC = next_PC; - next_PC = cur_PC + instSize; //next_NPC; - next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; - inst->setPredTarg(next_NPC); -#else - next_PC = next_PC + instSize; - inst->setPredTarg(next_PC); -#endif + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + inst->setPredTarg(next_PC, next_NPC); + inst->setPredTaken(false); return false; } int tid = inst->threadNumber; -#if ISA_HAS_DELAY_SLOT Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); - if (predict_taken) { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); +/* if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n", + tid, pred_PC); } else { - DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); - } + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid); + }*/ - if (predict_taken) { - next_PC = next_NPC; +#if ISA_HAS_DELAY_SLOT + next_PC = next_NPC; + if (predict_taken) next_NPC = pred_PC; - - // Update delay slot info - ++delaySlotInfo[tid].numInsts; - delaySlotInfo[tid].targetAddr = pred_PC; - DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, - delaySlotInfo[tid].numInsts); - } else { // !predict_taken - if (inst->isCondDelaySlot()) { - next_PC = pred_PC; - // The delay slot is skipped here if there is on - // prediction - } else { - next_PC = next_NPC; - // No need to declare a delay slot here since - // there is no for the pred. target to jump - } - - next_NPC = next_NPC + instSize; - } + else + next_NPC += instSize; #else - predict_taken = branchPred.predict(inst, next_PC, tid); + if (predict_taken) + next_PC = pred_PC; + else + next_PC += instSize; + next_NPC = next_PC + instSize; #endif +/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", + tid, next_PC, next_NPC);*/ + inst->setPredTarg(next_PC, next_NPC); + inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -671,14 +649,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template <class Impl> inline void -DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) +DefaultFetch<Impl>::doSquash(const Addr &new_PC, + const Addr &new_NPC, unsigned tid) { - DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n", - tid, new_PC); + DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", + tid, new_PC, new_NPC); PC[tid] = new_PC; - nextPC[tid] = new_PC + instSize; - nextNPC[tid] = new_PC + (2 * instSize); + nextPC[tid] = new_NPC; + nextNPC[tid] = new_NPC + instSize; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -704,21 +683,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) template<class Impl> void -DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, +DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, tid); - -#if ISA_HAS_DELAY_SLOT - if (seq_num <= delaySlotInfo[tid].branchSeqNum) { - delaySlotInfo[tid].numInsts = 0; - delaySlotInfo[tid].targetAddr = 0; - delaySlotInfo[tid].targetReady = false; - } -#endif + doSquash(new_PC, new_NPC, tid); // Tell the CPU to remove any instructions that are in flight between // fetch and decode. @@ -793,20 +764,15 @@ DefaultFetch<Impl>::updateFetchStatus() template <class Impl> void -DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, +DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC, + const InstSeqNum &seq_num, bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, tid); + doSquash(new_PC, new_NPC, tid); #if ISA_HAS_DELAY_SLOT - if (seq_num <= delaySlotInfo[tid].branchSeqNum) { - delaySlotInfo[tid].numInsts = 0; - delaySlotInfo[tid].targetAddr = 0; - delaySlotInfo[tid].targetReady = false; - } - // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); #else @@ -929,6 +895,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) #endif // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].nextNPC, doneSeqNum, fromCommit->commitInfo[tid].squashDelaySlot, tid); @@ -984,8 +951,12 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) #else InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; #endif + DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n", + fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].nextNPC); // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].nextNPC, doneSeqNum, tid); @@ -1042,6 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) // The current PC. Addr &fetch_PC = PC[tid]; + Addr &fetch_NPC = nextPC[tid]; + // Fault code for memory access. Fault fault = NoFault; @@ -1098,7 +1071,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) } Addr next_PC = fetch_PC; - Addr next_NPC = next_PC + instSize; + Addr next_NPC = fetch_NPC; + InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -1117,15 +1091,22 @@ DefaultFetch<Impl>::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; - // Need to keep track of whether or not a delay slot - // instruction has been fetched - for (; offset < cacheBlkSize && numInst < fetchWidth && - (!predicted_branch || delaySlotInfo[tid].numInsts > 0); + !predicted_branch; ++numInst) { + // If we're branching after this instruction, quite fetching + // from the same block then. + predicted_branch = + (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC); + if (predicted_branch) { + DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n", + fetch_PC, fetch_NPC); + } + + // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); @@ -1145,8 +1126,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) #endif // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, - next_PC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, + next_PC, next_NPC, inst_seq, cpu); instruction->setTid(tid); @@ -1158,6 +1140,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) "[sn:%lli]\n", tid, instruction->readPC(), inst_seq); + //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); + DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, instruction->staticInst->disassemble(fetch_PC)); @@ -1166,8 +1150,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) instruction->staticInst, instruction->readPC()); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, - next_NPC); + lookupAndUpdateNextPC(instruction, next_PC, next_NPC); + predicted_branch |= (next_PC != fetch_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1183,6 +1167,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; + fetch_NPC = next_NPC; if (instruction->isQuiesce()) { DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", @@ -1194,29 +1179,6 @@ DefaultFetch<Impl>::fetch(bool &status_change) } offset += instSize; - -#if ISA_HAS_DELAY_SLOT - if (predicted_branch) { - delaySlotInfo[tid].branchSeqNum = inst_seq; - - DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", - tid, inst_seq); - continue; - } else if (delaySlotInfo[tid].numInsts > 0) { - --delaySlotInfo[tid].numInsts; - - // It's OK to set PC to target of branch - if (delaySlotInfo[tid].numInsts == 0) { - delaySlotInfo[tid].targetReady = true; - - // Break the looping condition - predicted_branch = true; - } - - DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" - " process.\n", tid, delaySlotInfo[tid].numInsts); - } -#endif } if (offset >= cacheBlkSize) { @@ -1225,7 +1187,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + } else if (predicted_branch) { DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " "instruction encountered.\n", tid); } @@ -1238,26 +1200,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Now that fetching is completed, update the PC to signify what the next // cycle will be. if (fault == NoFault) { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; #if ISA_HAS_DELAY_SLOT - if (delaySlotInfo[tid].targetReady && - delaySlotInfo[tid].numInsts == 0) { - // Set PC to target - PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC - nextPC[tid] = next_PC + instSize; //next_NPC - nextNPC[tid] = next_PC + (2 * instSize); - - delaySlotInfo[tid].targetReady = false; - } else { - PC[tid] = next_PC; - nextPC[tid] = next_NPC; - nextNPC[tid] = next_NPC + instSize; - } - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); #else - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; + DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1277,10 +1226,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) ext_inst = TheISA::NoopMachInst; // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, - next_PC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, + next_PC, next_NPC, inst_seq, cpu); - instruction->setPredTarg(next_PC + instSize); + instruction->setPredTarg(next_PC, next_NPC); instruction->setTid(tid); instruction->setASID(tid); |