From c7f1cf1d58cf50118c18b1afc4c938eafba81492 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 13 Apr 2007 13:59:31 +0000 Subject: Remove most of the special handling for delay slots since they have to be squashed anyway on a mispredict. This is because the NNPC value they saw when executing was incorrect. --HG-- extra : convert_revision : b42c4eb28b4fbba66c65cbd0a5033bf886c1532d --- src/cpu/o3/fetch_impl.hh | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) (limited to 'src/cpu/o3/fetch_impl.hh') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index e16f97558..25498c7f3 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -774,20 +774,14 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - bool squash_delay_slot, unsigned tid) + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, new_NPC, tid); -#if ISA_HAS_DELAY_SLOT - // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); -#else // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid, true, 0); -#endif + cpu->removeInstsNotInROB(tid); } template @@ -896,17 +890,10 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); - -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; -#endif // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, - doneSeqNum, - fromCommit->commitInfo[tid].squashDelaySlot, + fromCommit->commitInfo[tid].doneSeqNum, tid); // Also check if there's a mispredict that happened. @@ -955,18 +942,13 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) if (fetchStatus[tid] != Squashing) { -#if ISA_HAS_DELAY_SLOT - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; -#else - InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; -#endif DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n", fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC); // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, - doneSeqNum, + fromDecode->decodeInfo[tid].doneSeqNum, tid); return true; @@ -1157,9 +1139,6 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots -#if !ISA_HAS_DELAY_SLOT -// predicted_branch |= -#endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1213,11 +1192,7 @@ DefaultFetch::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_NPC; nextNPC[tid] = next_NPC + instSize; -#if ISA_HAS_DELAY_SLOT - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); -#else DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); -#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) -- cgit v1.2.3 From c3081d9c1c36e1a08c173048783d191fa19463de Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:13:18 +0000 Subject: Add support for microcode and pull out the special branch delay slot handling. Branch delay slots need to be squash on a mispredict as well because the nnpc they saw was incorrect. --HG-- extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719 --- src/cpu/o3/fetch_impl.hh | 91 ++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 33 deletions(-) (limited to 'src/cpu/o3/fetch_impl.hh') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25498c7f3..d1f38e38b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -312,7 +312,7 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); - nextNPC[tid] = cpu->readNextNPC(tid); + microPC[tid] = cpu->readMicroPC(tid); } for (int tid=0; tid < numThreads; tid++) { @@ -439,11 +439,7 @@ DefaultFetch::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); -#if ISA_HAS_DELAY_SLOT - nextNPC[i] = cpu->readNextNPC(i); -#else - nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst); -#endif + microPC[i] = cpu->readMicroPC(i); fetchStatus[i] = Running; } numInst = 0; @@ -493,7 +489,7 @@ DefaultFetch::switchToInactive() template bool DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, - Addr &next_NPC) + Addr &next_NPC, Addr &next_MicroPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -501,13 +497,22 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - next_PC = next_NPC; - next_NPC = next_NPC + instSize; - inst->setPredTarg(next_PC, next_NPC); + if (inst->isMicroOp() && !inst->isLastMicroOp()) { + next_MicroPC++; + } else { + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + next_MicroPC = 0; + } + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(false); return false; } + //Assume for now that all control flow is to a different macroop which + //would reset the micro pc to 0. + next_MicroPC = 0; + int tid = inst->threadNumber; Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); @@ -534,7 +539,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, #endif /* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", tid, next_PC, next_NPC);*/ - inst->setPredTarg(next_PC, next_NPC); + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -658,14 +663,14 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template inline void DefaultFetch::doSquash(const Addr &new_PC, - const Addr &new_NPC, unsigned tid) + const Addr &new_NPC, const Addr &new_microPC, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", tid, new_PC, new_NPC); PC[tid] = new_PC; nextPC[tid] = new_NPC; - nextNPC[tid] = new_NPC + instSize; + microPC[tid] = new_microPC; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -693,12 +698,12 @@ DefaultFetch::doSquash(const Addr &new_PC, template void DefaultFetch::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - unsigned tid) + const Addr &new_MicroPC, + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are in flight between // fetch and decode. @@ -774,11 +779,12 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); @@ -893,6 +899,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, + fromCommit->commitInfo[tid].nextMicroPC, fromCommit->commitInfo[tid].doneSeqNum, tid); @@ -948,6 +955,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, + fromDecode->decodeInfo[tid].nextMicroPC, fromDecode->decodeInfo[tid].doneSeqNum, tid); @@ -1002,9 +1010,9 @@ DefaultFetch::fetch(bool &status_change) DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); // The current PC. - Addr &fetch_PC = PC[tid]; - - Addr &fetch_NPC = nextPC[tid]; + Addr fetch_PC = PC[tid]; + Addr fetch_NPC = nextPC[tid]; + Addr fetch_MicroPC = microPC[tid]; // Fault code for memory access. Fault fault = NoFault; @@ -1063,6 +1071,7 @@ DefaultFetch::fetch(bool &status_change) Addr next_PC = fetch_PC; Addr next_NPC = fetch_NPC; + Addr next_MicroPC = fetch_MicroPC; InstSeqNum inst_seq; MachInst inst; @@ -1070,6 +1079,9 @@ DefaultFetch::fetch(bool &status_change) // @todo: Fix this hack. unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + StaticInstPtr staticInst = NULL; + StaticInstPtr macroop = NULL; + if (fault == NoFault) { // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the @@ -1104,19 +1116,29 @@ DefaultFetch::fetch(bool &status_change) // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); - // Get the instruction from the array of the cache line. - inst = TheISA::gtoh(*reinterpret_cast - (&cacheData[tid][offset])); + if (!macroop) { + // Get the instruction from the array of the cache line. + inst = TheISA::gtoh(*reinterpret_cast + (&cacheData[tid][offset])); - predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(fetch_PC, 0, inst); + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, 0, inst); - ext_inst = predecoder.getExtMachInst(); + ext_inst = predecoder.getExtMachInst(); + staticInst = StaticInstPtr(ext_inst); + if (staticInst->isMacroOp()) + macroop = staticInst; + } + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); instruction->setTid(tid); @@ -1139,7 +1161,7 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC); + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); predicted_branch |= (next_PC != fetch_NPC); // Add instruction to the CPU's list of instructions. @@ -1157,6 +1179,7 @@ DefaultFetch::fetch(bool &status_change) // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; if (instruction->isQuiesce()) { DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", @@ -1167,7 +1190,8 @@ DefaultFetch::fetch(bool &status_change) break; } - offset += instSize; + if (!macroop) + offset += instSize; } if (offset >= cacheBlkSize) { @@ -1191,7 +1215,7 @@ DefaultFetch::fetch(bool &status_change) if (fault == NoFault) { PC[tid] = next_PC; nextPC[tid] = next_NPC; - nextNPC[tid] = next_NPC + instSize; + microPC[tid] = next_MicroPC; DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1210,8 +1234,9 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; + StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, + DynInstPtr instruction = new DynInst(staticInst, fetch_PC, fetch_NPC, next_PC, next_NPC, inst_seq, cpu); -- cgit v1.2.3 From 8248af53b19a633ae6d9aa8cd6b5a12cfa3b1644 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 15 Apr 2007 21:52:38 +0000 Subject: Make an inner loop which pulls microops out of macroops. These aren't checked for control flow because we can pull out microops until we run out of buffer. This prevents microops from being interpretted as branches because the pc doesn't become npc. --HG-- extra : convert_revision : 9fff7c6c32900692bbc567ecb75701c9c73da259 --- src/cpu/o3/fetch_impl.hh | 127 ++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 63 deletions(-) (limited to 'src/cpu/o3/fetch_impl.hh') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index d1f38e38b..3ae7bc402 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1094,11 +1094,9 @@ DefaultFetch::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; - for (; - offset < cacheBlkSize && - numInst < fetchWidth && - !predicted_branch; - ++numInst) { + while (offset < cacheBlkSize && + numInst < fetchWidth && + !predicted_branch) { // If we're branching after this instruction, quite fetching // from the same block then. @@ -1109,10 +1107,6 @@ DefaultFetch::fetch(bool &status_change) fetch_PC, fetch_NPC); } - - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); @@ -1129,80 +1123,87 @@ DefaultFetch::fetch(bool &status_change) if (staticInst->isMacroOp()) macroop = staticInst; } - if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) - macroop = NULL; - } + do { + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } - // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, fetch_MicroPC, - next_PC, next_NPC, next_MicroPC, - inst_seq, cpu); - instruction->setTid(tid); + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); - instruction->setASID(tid); + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, + inst_seq, cpu); + instruction->setTid(tid); - instruction->setThreadState(cpu->thread[tid]); + instruction->setASID(tid); - DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " - "[sn:%lli]\n", - tid, instruction->readPC(), inst_seq); + instruction->setThreadState(cpu->thread[tid]); - //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); + DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " + "[sn:%lli]\n", + tid, instruction->readPC(), inst_seq); - DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", - tid, instruction->staticInst->disassemble(fetch_PC)); + //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); - instruction->traceData = - Trace::getInstRecord(curTick, cpu->tcBase(tid), - instruction->staticInst, - instruction->readPC()); + DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", + tid, instruction->staticInst->disassemble(fetch_PC)); - ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - predicted_branch |= (next_PC != fetch_NPC); + instruction->traceData = + Trace::getInstRecord(curTick, cpu->tcBase(tid), + instruction->staticInst, + instruction->readPC()); - // Add instruction to the CPU's list of instructions. - instruction->setInstListIt(cpu->addInst(instruction)); + ///FIXME This needs to be more robust in dealing with delay slots + predicted_branch |= + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); - // Write the instruction to the first slot in the queue - // that heads to decode. - toDecode->insts[numInst] = instruction; + // Add instruction to the CPU's list of instructions. + instruction->setInstListIt(cpu->addInst(instruction)); - toDecode->size++; + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[numInst] = instruction; - // Increment stat of fetched instructions. - ++fetchedInsts; + toDecode->size++; - // Move to the next instruction, unless we have a branch. - fetch_PC = next_PC; - fetch_NPC = next_NPC; - fetch_MicroPC = next_MicroPC; + // Increment stat of fetched instructions. + ++fetchedInsts; - if (instruction->isQuiesce()) { - DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", - curTick); - fetchStatus[tid] = QuiescePending; - ++numInst; - status_change = true; - break; - } + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; + + if (instruction->isQuiesce()) { + DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", + curTick); + fetchStatus[tid] = QuiescePending; + ++numInst; + status_change = true; + break; + } - if (!macroop) - offset += instSize; + ++numInst; + } while (staticInst->isMicroOp() && + !staticInst->isLastMicroOp() && + numInst < fetchWidth); + offset += instSize; } - if (offset >= cacheBlkSize) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " - "block.\n", tid); + if (predicted_branch) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (predicted_branch) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " - "instruction encountered.\n", tid); + } else if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); } } -- cgit v1.2.3 From df7730b6774a730d554bfaa469ad95eeeffd3dc9 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 20 Jun 2007 19:46:45 -0700 Subject: Fix compiler errors. --HG-- extra : convert_revision : 2b10076a24cb36cb748e299011ae691f09c158cd --- src/cpu/o3/fetch_impl.hh | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'src/cpu/o3/fetch_impl.hh') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 0fd1e7bac..857a08629 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -498,7 +498,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - if (inst->isMicroOp() && !inst->isLastMicroOp()) { + if (inst->isMicroop() && !inst->isLastMicroop()) { next_MicroPC++; } else { next_PC = next_NPC; @@ -1120,14 +1120,14 @@ DefaultFetch::fetch(bool &status_change) predecoder.moreBytes(fetch_PC, fetch_PC, 0, inst); ext_inst = predecoder.getExtMachInst(); - staticInst = StaticInstPtr(ext_inst); - if (staticInst->isMacroOp()) + staticInst = StaticInstPtr(ext_inst, fetch_PC); + if (staticInst->isMacroop()) macroop = staticInst; } do { if (macroop) { - staticInst = macroop->fetchMicroOp(fetch_MicroPC); - if (staticInst->isLastMicroOp()) + staticInst = macroop->fetchMicroop(fetch_MicroPC); + if (staticInst->isLastMicroop()) macroop = NULL; } @@ -1194,8 +1194,8 @@ DefaultFetch::fetch(bool &status_change) } ++numInst; - } while (staticInst->isMicroOp() && - !staticInst->isLastMicroOp() && + } while (staticInst->isMicroop() && + !staticInst->isLastMicroop() && numInst < fetchWidth); offset += instSize; } @@ -1240,13 +1240,12 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; - StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(staticInst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(ext_inst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); - instruction->setPredTarg(next_PC, next_NPC); + instruction->setPredTarg(next_PC, next_NPC, 1); instruction->setTid(tid); instruction->setASID(tid); -- cgit v1.2.3