diff options
author | Korey Sewell <ksewell@umich.edu> | 2009-05-12 15:01:14 -0400 |
---|---|---|
committer | Korey Sewell <ksewell@umich.edu> | 2009-05-12 15:01:14 -0400 |
commit | b569f8f0ed8dcf32347f0d4f68d2d7572a5d1353 (patch) | |
tree | 14b4f11266600c44ec4c1846665277115911b363 /src | |
parent | 1c8dfd92543aba5f49e464b17e7e8143fc01a58c (diff) | |
download | gem5-b569f8f0ed8dcf32347f0d4f68d2d7572a5d1353.tar.xz |
inorder-bpred: edits to handle non-delay-slot ISAs
Changes so that InOrder can work for a non-delay-slot ISA like Alpha. Typically, changes have to do with handling misspeculated branches at different points in pipeline
Diffstat (limited to 'src')
-rw-r--r-- | src/arch/alpha/process.cc | 8 | ||||
-rw-r--r-- | src/cpu/inorder/inorder_dyn_inst.hh | 36 | ||||
-rw-r--r-- | src/cpu/inorder/pipeline_stage.cc | 12 | ||||
-rw-r--r-- | src/cpu/inorder/resources/bpred_unit.cc | 15 | ||||
-rw-r--r-- | src/cpu/inorder/resources/branch_predictor.cc | 4 | ||||
-rw-r--r-- | src/cpu/inorder/resources/execution_unit.cc | 26 | ||||
-rw-r--r-- | src/cpu/inorder/resources/fetch_seq_unit.cc | 36 | ||||
-rwxr-xr-x | src/cpu/o3/thread_context.hh | 3 |
8 files changed, 100 insertions, 40 deletions
diff --git a/src/arch/alpha/process.cc b/src/arch/alpha/process.cc index 093d83d8a..93df459ae 100644 --- a/src/arch/alpha/process.cc +++ b/src/arch/alpha/process.cc @@ -166,11 +166,11 @@ AlphaLiveProcess::argsInit(int intSize, int pageSize) tc->setPC(prog_entry); tc->setNextPC(prog_entry + sizeof(MachInst)); -#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc + // MIPS/Sparc need NNPC for delay slot handling, while + // Alpha has no delay slots... However, CPU models + // cycle PCs by PC=NPC, NPC=NNPC, etc. so setting this + // here ensures CPU-Model Compatibility across board tc->setNextNPC(prog_entry + (2 * sizeof(MachInst))); -#endif - - } void diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 12a9a4176..8e88fc583 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -264,6 +264,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Predicted next PC. */ Addr predPC; + /** Predicted next NPC. */ + Addr predNPC; + + /** Predicted next microPC */ + Addr predMicroPC; + /** Address to fetch from */ Addr fetchAddr; @@ -506,7 +512,14 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Returns the next NPC. This could be the speculative next NPC if it is * called prior to the actual branch target being calculated. */ - Addr readNextNPC() { return nextNPC; } + Addr readNextNPC() + { +#if ISA_HAS_DELAY_SLOT + return nextNPC; +#else + return nextPC + sizeof(TheISA::MachInst); +#endif + } /** Set the next PC of this instruction (its actual target). */ void setNextNPC(uint64_t val) { nextNPC = val; } @@ -522,19 +535,26 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Returns the predicted target of the branch. */ Addr readPredTarg() { return predPC; } + /** Returns the predicted PC immediately after the branch. */ + Addr readPredPC() { return predPC; } + + /** Returns the predicted PC two instructions after the branch */ + Addr readPredNPC() { return predNPC; } + + /** Returns the predicted micro PC after the branch */ + Addr readPredMicroPC() { return predMicroPC; } + /** Returns whether the instruction was predicted taken or not. */ bool predTaken() { return predictTaken; } /** Returns whether the instruction mispredicted. */ bool mispredicted() { - // Special case since a not-taken, cond. delay slot, effectively - // nullifies the delay slot instruction - if (isCondDelaySlot() && !predictTaken) { - return predPC != nextPC; - } else { - return predPC != nextNPC; - } +#if ISA_HAS_DELAY_SLOT + return predPC != nextNPC; +#else + return predPC != nextPC; +#endif } /** Returns whether the instruction mispredicted. */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index cb69464b0..d8e26f725 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -342,13 +342,21 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, unsigned tid) toPrevStages->stageInfo[stageNum][tid].doneSeqNum = inst->seqNum; toPrevStages->stageInfo[stageNum][tid].squash = true; toPrevStages->stageInfo[stageNum][tid].nextPC = inst->readPredTarg(); + + +#if ISA_HAS_DELAY_SLOT toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum; - - DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg()); InstSeqNum squash_seq_num = inst->bdelaySeqNum; +#else + toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() != + (inst->readPC() + sizeof(TheISA::MachInst)); + toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum; + InstSeqNum squash_seq_num = inst->seqNum; +#endif + DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg()); DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] " "branch.\n", tid, squash_seq_num, inst->seqNum); diff --git a/src/cpu/inorder/resources/bpred_unit.cc b/src/cpu/inorder/resources/bpred_unit.cc index 66d0779a2..df6b33792 100644 --- a/src/cpu/inorder/resources/bpred_unit.cc +++ b/src/cpu/inorder/resources/bpred_unit.cc @@ -196,7 +196,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predict_record.RASIndex = RAS[tid].topIdx(); predict_record.RASTarget = target; - assert(predict_record.RASIndex < 16); + assert(predict_record.RASIndex < 16); RAS[tid].pop(); @@ -219,14 +219,14 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) } if (inst->isCall() && - inst->isUncondCtrl() && - inst->isDirectCtrl()) { - target = inst->branchTarget(); + inst->isUncondCtrl() && + inst->isDirectCtrl()) { + target = inst->branchTarget(); DPRINTF(Fetch, "BranchPred: [tid:%i]: Setting %#x predicted" " target to %#x.\n", tid, inst->readPC(), target); - } else if (BTB.valid(PC, tid)) { + } else if (BTB.valid(PC, tid)) { ++BTBHits; // If it's not a return, use the BTB to get the target addr. @@ -248,7 +248,12 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) PC = target; inst->setPredTarg(target); } else { +#if ISA_HAS_DELAY_SLOT + // This value will be inst->PC + 4 (nextPC) + // Delay Slot archs need this to be inst->PC + 8 (nextNPC) + // so we increment one more time here. PC = PC + sizeof(MachInst); +#endif inst->setPredTarg(PC); } diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc index 511a0ac82..d8c0730af 100644 --- a/src/cpu/inorder/resources/branch_predictor.cc +++ b/src/cpu/inorder/resources/branch_predictor.cc @@ -78,12 +78,12 @@ BranchPredictor::execute(int slot_num) Addr pred_PC = inst->readNextPC(); if (inst->isControl()) { - // If predicted, the pred_PC will be updated to new target value // If not, the pred_PC be updated to pc+8 + // If predicted, the pred_PC will be updated to new target value bool predict_taken = branchPred.predict(inst, pred_PC, tid); if (predict_taken) { - DPRINTF(Resource, "[tid:%i]: [sn:%i]: Branch predicted true.\n", + DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted true.\n", tid, seq_num); inst->setPredTarg(pred_PC); diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 843adb5b0..60cbac8af 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -76,7 +76,7 @@ ExecutionUnit::execute(int slot_num) case ExecuteInst: { if (inst->isMemRef()) { - fatal("%s not configured to handle memory ops.\n", resName); + panic("%s not configured to handle memory ops.\n", resName); } else if (inst->isControl()) { // Evaluate Branch fault = inst->execute(); @@ -111,23 +111,33 @@ ExecutionUnit::execute(int slot_num) "[sn:%i] PC %#x mispredicted as not taken.\n", tid, seq_num, inst->PC); } else { +#if ISA_HAS_DELAY_SLOT inst->bdelaySeqNum = seq_num + 1; - + inst->setPredTarg(inst->nextNPC); +#else + inst->bdelaySeqNum = seq_num; + inst->setPredTarg(inst->nextPC); +#endif DPRINTF(InOrderExecute, "[tid:%i]: Misprediction detected at " "[sn:%i] PC %#x,\n\t squashing after delay slot " "instruction [sn:%i].\n", tid, seq_num, inst->PC, inst->bdelaySeqNum); DPRINTF(InOrderStall, "STALL: [tid:%i]: Branch " "misprediction at %#x\n", tid, inst->PC); - inst->setPredTarg(inst->nextNPC); } DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid, inst->readPredTarg()); } else if(inst->isIndirectCtrl()){ +#if ISA_HAS_DELAY_SLOT inst->setPredTarg(inst->nextNPC); inst->bdelaySeqNum = seq_num + 1; +#else + inst->setPredTarg(inst->nextPC); + inst->bdelaySeqNum = seq_num; +#endif + DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid, inst->readPredTarg()); } else { @@ -151,7 +161,13 @@ ExecutionUnit::execute(int slot_num) } else { predictedNotTakenIncorrect++; } + } else { + DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: Prediction Correct.\n", + inst->readTid(), seq_num, inst->readIntResult(0)); } + + DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n", + inst->readTid(), seq_num, inst->readIntResult(0)); exec_req->done(); } else { warn("inst [sn:%i] had a %s fault", seq_num, fault->name()); @@ -164,8 +180,8 @@ ExecutionUnit::execute(int slot_num) inst->setExecuted(); exec_req->done(); - DPRINTF(InOrderExecute, "[tid:%i]: The result of execution is 0x%x.\n", - inst->readTid(), inst->readIntResult(0)); + DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n", + inst->readTid(), seq_num, inst->readIntResult(0)); } else { warn("inst [sn:%i] had a %s fault", seq_num, fault->name()); cpu->trap(fault, tid); diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 444252e1b..69610ae58 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -96,13 +96,16 @@ FetchSeqUnit::execute(int slot_num) inst->setNextPC(PC[tid] + instSize); inst->setNextNPC(PC[tid] + (instSize * 2)); +#if ISA_HAS_DELAY_SLOT inst->setPredTarg(inst->readNextNPC()); - +#else + inst->setPredTarg(inst->readNextPC()); +#endif inst->setMemAddr(PC[tid]); inst->setSeqNum(cpu->getAndIncrementInstSeq(tid)); - DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p\n", tid, - inst->seqNum, inst->readPC()); + DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p, NPC %08p, NNPC %08p\n", tid, + inst->seqNum, inst->readPC(), inst->readNextPC(), inst->readNextNPC()); if (delaySlotInfo[tid].numInsts > 0) { --delaySlotInfo[tid].numInsts; @@ -150,30 +153,37 @@ FetchSeqUnit::execute(int slot_num) squashAfterInst(inst, stage_num, tid); } else if (!inst->isCondDelaySlot() && !inst->predTaken()) { - // Not-Taken Control + // Not-Taken Control DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control " "inst. updating PC to %08p\n", tid, inst->seqNum, inst->readNextPC()); - +#if ISA_HAS_DELAY_SLOT ++delaySlotInfo[tid].numInsts; delaySlotInfo[tid].targetReady = false; delaySlotInfo[tid].targetAddr = inst->readNextNPC(); - +#else + assert(delaySlotInfo[tid].numInsts == 0); +#endif } else if (inst->predTaken()) { - // Taken Control + // Taken Control +#if ISA_HAS_DELAY_SLOT ++delaySlotInfo[tid].numInsts; delaySlotInfo[tid].targetReady = false; delaySlotInfo[tid].targetAddr = inst->readPredTarg(); DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target " "to PC %08p\n", tid, inst->seqNum, inst->readPredTarg()); - - // Set-Up Squash Through-Out Pipeline - DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n", - tid, stage_num, seq_num + 1); inst->bdelaySeqNum = seq_num + 1; +#else + inst->bdelaySeqNum = seq_num; + assert(delaySlotInfo[tid].numInsts == 0); +#endif + inst->squashingStage = stage_num; + DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n", + tid, stage_num, inst->bdelaySeqNum); + // Do Squashing squashAfterInst(inst, stage_num, tid); } @@ -239,6 +249,10 @@ FetchSeqUnit::squash(DynInstPtr inst, int squash_stage, DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); } else { +#if !ISA_HAS_DELAY_SLOT + assert(0); +#endif + delaySlotInfo[tid].numInsts = 1; delaySlotInfo[tid].targetReady = false; delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index e7c9c3b8f..b10305d5d 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -265,9 +265,6 @@ class O3ThreadContext : public ThreadContext virtual void setNextNPC(uint64_t val) { -#if THE_ISA == ALPHA_ISA - panic("Not supported on Alpha!"); -#endif this->cpu->setNextNPC(val, this->thread->threadId()); } }; |