summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKorey Sewell <ksewell@umich.edu>2009-05-12 15:01:14 -0400
committerKorey Sewell <ksewell@umich.edu>2009-05-12 15:01:14 -0400
commitb569f8f0ed8dcf32347f0d4f68d2d7572a5d1353 (patch)
tree14b4f11266600c44ec4c1846665277115911b363
parent1c8dfd92543aba5f49e464b17e7e8143fc01a58c (diff)
downloadgem5-b569f8f0ed8dcf32347f0d4f68d2d7572a5d1353.tar.xz
inorder-bpred: edits to handle non-delay-slot ISAs
Changes so that InOrder can work for a non-delay-slot ISA like Alpha. Typically, changes have to do with handling misspeculated branches at different points in pipeline
-rw-r--r--src/arch/alpha/process.cc8
-rw-r--r--src/cpu/inorder/inorder_dyn_inst.hh36
-rw-r--r--src/cpu/inorder/pipeline_stage.cc12
-rw-r--r--src/cpu/inorder/resources/bpred_unit.cc15
-rw-r--r--src/cpu/inorder/resources/branch_predictor.cc4
-rw-r--r--src/cpu/inorder/resources/execution_unit.cc26
-rw-r--r--src/cpu/inorder/resources/fetch_seq_unit.cc36
-rwxr-xr-xsrc/cpu/o3/thread_context.hh3
8 files changed, 100 insertions, 40 deletions
diff --git a/src/arch/alpha/process.cc b/src/arch/alpha/process.cc
index 093d83d8a..93df459ae 100644
--- a/src/arch/alpha/process.cc
+++ b/src/arch/alpha/process.cc
@@ -166,11 +166,11 @@ AlphaLiveProcess::argsInit(int intSize, int pageSize)
tc->setPC(prog_entry);
tc->setNextPC(prog_entry + sizeof(MachInst));
-#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc
+ // MIPS/Sparc need NNPC for delay slot handling, while
+ // Alpha has no delay slots... However, CPU models
+ // cycle PCs by PC=NPC, NPC=NNPC, etc. so setting this
+ // here ensures CPU-Model Compatibility across board
tc->setNextNPC(prog_entry + (2 * sizeof(MachInst)));
-#endif
-
-
}
void
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 12a9a4176..8e88fc583 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -264,6 +264,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted
/** Predicted next PC. */
Addr predPC;
+ /** Predicted next NPC. */
+ Addr predNPC;
+
+ /** Predicted next microPC */
+ Addr predMicroPC;
+
/** Address to fetch from */
Addr fetchAddr;
@@ -506,7 +512,14 @@ class InOrderDynInst : public FastAlloc, public RefCounted
/** Returns the next NPC. This could be the speculative next NPC if it is
* called prior to the actual branch target being calculated.
*/
- Addr readNextNPC() { return nextNPC; }
+ Addr readNextNPC()
+ {
+#if ISA_HAS_DELAY_SLOT
+ return nextNPC;
+#else
+ return nextPC + sizeof(TheISA::MachInst);
+#endif
+ }
/** Set the next PC of this instruction (its actual target). */
void setNextNPC(uint64_t val) { nextNPC = val; }
@@ -522,19 +535,26 @@ class InOrderDynInst : public FastAlloc, public RefCounted
/** Returns the predicted target of the branch. */
Addr readPredTarg() { return predPC; }
+ /** Returns the predicted PC immediately after the branch. */
+ Addr readPredPC() { return predPC; }
+
+ /** Returns the predicted PC two instructions after the branch */
+ Addr readPredNPC() { return predNPC; }
+
+ /** Returns the predicted micro PC after the branch */
+ Addr readPredMicroPC() { return predMicroPC; }
+
/** Returns whether the instruction was predicted taken or not. */
bool predTaken() { return predictTaken; }
/** Returns whether the instruction mispredicted. */
bool mispredicted()
{
- // Special case since a not-taken, cond. delay slot, effectively
- // nullifies the delay slot instruction
- if (isCondDelaySlot() && !predictTaken) {
- return predPC != nextPC;
- } else {
- return predPC != nextNPC;
- }
+#if ISA_HAS_DELAY_SLOT
+ return predPC != nextNPC;
+#else
+ return predPC != nextPC;
+#endif
}
/** Returns whether the instruction mispredicted. */
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index cb69464b0..d8e26f725 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -342,13 +342,21 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, unsigned tid)
toPrevStages->stageInfo[stageNum][tid].doneSeqNum = inst->seqNum;
toPrevStages->stageInfo[stageNum][tid].squash = true;
toPrevStages->stageInfo[stageNum][tid].nextPC = inst->readPredTarg();
+
+
+#if ISA_HAS_DELAY_SLOT
toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() !=
(inst->readNextPC() + sizeof(TheISA::MachInst));
toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum;
-
- DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
InstSeqNum squash_seq_num = inst->bdelaySeqNum;
+#else
+ toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() !=
+ (inst->readPC() + sizeof(TheISA::MachInst));
+ toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum;
+ InstSeqNum squash_seq_num = inst->seqNum;
+#endif
+ DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] "
"branch.\n", tid, squash_seq_num, inst->seqNum);
diff --git a/src/cpu/inorder/resources/bpred_unit.cc b/src/cpu/inorder/resources/bpred_unit.cc
index 66d0779a2..df6b33792 100644
--- a/src/cpu/inorder/resources/bpred_unit.cc
+++ b/src/cpu/inorder/resources/bpred_unit.cc
@@ -196,7 +196,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
predict_record.RASIndex = RAS[tid].topIdx();
predict_record.RASTarget = target;
- assert(predict_record.RASIndex < 16);
+ assert(predict_record.RASIndex < 16);
RAS[tid].pop();
@@ -219,14 +219,14 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
}
if (inst->isCall() &&
- inst->isUncondCtrl() &&
- inst->isDirectCtrl()) {
- target = inst->branchTarget();
+ inst->isUncondCtrl() &&
+ inst->isDirectCtrl()) {
+ target = inst->branchTarget();
DPRINTF(Fetch, "BranchPred: [tid:%i]: Setting %#x predicted"
" target to %#x.\n",
tid, inst->readPC(), target);
- } else if (BTB.valid(PC, tid)) {
+ } else if (BTB.valid(PC, tid)) {
++BTBHits;
// If it's not a return, use the BTB to get the target addr.
@@ -248,7 +248,12 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
PC = target;
inst->setPredTarg(target);
} else {
+#if ISA_HAS_DELAY_SLOT
+ // This value will be inst->PC + 4 (nextPC)
+ // Delay Slot archs need this to be inst->PC + 8 (nextNPC)
+ // so we increment one more time here.
PC = PC + sizeof(MachInst);
+#endif
inst->setPredTarg(PC);
}
diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc
index 511a0ac82..d8c0730af 100644
--- a/src/cpu/inorder/resources/branch_predictor.cc
+++ b/src/cpu/inorder/resources/branch_predictor.cc
@@ -78,12 +78,12 @@ BranchPredictor::execute(int slot_num)
Addr pred_PC = inst->readNextPC();
if (inst->isControl()) {
- // If predicted, the pred_PC will be updated to new target value
// If not, the pred_PC be updated to pc+8
+ // If predicted, the pred_PC will be updated to new target value
bool predict_taken = branchPred.predict(inst, pred_PC, tid);
if (predict_taken) {
- DPRINTF(Resource, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
+ DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch predicted true.\n",
tid, seq_num);
inst->setPredTarg(pred_PC);
diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc
index 843adb5b0..60cbac8af 100644
--- a/src/cpu/inorder/resources/execution_unit.cc
+++ b/src/cpu/inorder/resources/execution_unit.cc
@@ -76,7 +76,7 @@ ExecutionUnit::execute(int slot_num)
case ExecuteInst:
{
if (inst->isMemRef()) {
- fatal("%s not configured to handle memory ops.\n", resName);
+ panic("%s not configured to handle memory ops.\n", resName);
} else if (inst->isControl()) {
// Evaluate Branch
fault = inst->execute();
@@ -111,23 +111,33 @@ ExecutionUnit::execute(int slot_num)
"[sn:%i] PC %#x mispredicted as not taken.\n", tid,
seq_num, inst->PC);
} else {
+#if ISA_HAS_DELAY_SLOT
inst->bdelaySeqNum = seq_num + 1;
-
+ inst->setPredTarg(inst->nextNPC);
+#else
+ inst->bdelaySeqNum = seq_num;
+ inst->setPredTarg(inst->nextPC);
+#endif
DPRINTF(InOrderExecute, "[tid:%i]: Misprediction detected at "
"[sn:%i] PC %#x,\n\t squashing after delay slot "
"instruction [sn:%i].\n",
tid, seq_num, inst->PC, inst->bdelaySeqNum);
DPRINTF(InOrderStall, "STALL: [tid:%i]: Branch "
"misprediction at %#x\n", tid, inst->PC);
- inst->setPredTarg(inst->nextNPC);
}
DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid,
inst->readPredTarg());
} else if(inst->isIndirectCtrl()){
+#if ISA_HAS_DELAY_SLOT
inst->setPredTarg(inst->nextNPC);
inst->bdelaySeqNum = seq_num + 1;
+#else
+ inst->setPredTarg(inst->nextPC);
+ inst->bdelaySeqNum = seq_num;
+#endif
+
DPRINTF(InOrderExecute, "[tid:%i] Redirecting fetch to %#x.\n", tid,
inst->readPredTarg());
} else {
@@ -151,7 +161,13 @@ ExecutionUnit::execute(int slot_num)
} else {
predictedNotTakenIncorrect++;
}
+ } else {
+ DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: Prediction Correct.\n",
+ inst->readTid(), seq_num, inst->readIntResult(0));
}
+
+ DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n",
+ inst->readTid(), seq_num, inst->readIntResult(0));
exec_req->done();
} else {
warn("inst [sn:%i] had a %s fault", seq_num, fault->name());
@@ -164,8 +180,8 @@ ExecutionUnit::execute(int slot_num)
inst->setExecuted();
exec_req->done();
- DPRINTF(InOrderExecute, "[tid:%i]: The result of execution is 0x%x.\n",
- inst->readTid(), inst->readIntResult(0));
+ DPRINTF(InOrderExecute, "[tid:%i]: [sn:%i]: The result of execution is 0x%x.\n",
+ inst->readTid(), seq_num, inst->readIntResult(0));
} else {
warn("inst [sn:%i] had a %s fault", seq_num, fault->name());
cpu->trap(fault, tid);
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index 444252e1b..69610ae58 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -96,13 +96,16 @@ FetchSeqUnit::execute(int slot_num)
inst->setNextPC(PC[tid] + instSize);
inst->setNextNPC(PC[tid] + (instSize * 2));
+#if ISA_HAS_DELAY_SLOT
inst->setPredTarg(inst->readNextNPC());
-
+#else
+ inst->setPredTarg(inst->readNextPC());
+#endif
inst->setMemAddr(PC[tid]);
inst->setSeqNum(cpu->getAndIncrementInstSeq(tid));
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p\n", tid,
- inst->seqNum, inst->readPC());
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p, NPC %08p, NNPC %08p\n", tid,
+ inst->seqNum, inst->readPC(), inst->readNextPC(), inst->readNextNPC());
if (delaySlotInfo[tid].numInsts > 0) {
--delaySlotInfo[tid].numInsts;
@@ -150,30 +153,37 @@ FetchSeqUnit::execute(int slot_num)
squashAfterInst(inst, stage_num, tid);
} else if (!inst->isCondDelaySlot() && !inst->predTaken()) {
- // Not-Taken Control
+ // Not-Taken Control
DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control "
"inst. updating PC to %08p\n", tid, inst->seqNum,
inst->readNextPC());
-
+#if ISA_HAS_DELAY_SLOT
++delaySlotInfo[tid].numInsts;
delaySlotInfo[tid].targetReady = false;
delaySlotInfo[tid].targetAddr = inst->readNextNPC();
-
+#else
+ assert(delaySlotInfo[tid].numInsts == 0);
+#endif
} else if (inst->predTaken()) {
- // Taken Control
+ // Taken Control
+#if ISA_HAS_DELAY_SLOT
++delaySlotInfo[tid].numInsts;
delaySlotInfo[tid].targetReady = false;
delaySlotInfo[tid].targetAddr = inst->readPredTarg();
DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target "
"to PC %08p\n", tid, inst->seqNum, inst->readPredTarg());
-
- // Set-Up Squash Through-Out Pipeline
- DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
- tid, stage_num, seq_num + 1);
inst->bdelaySeqNum = seq_num + 1;
+#else
+ inst->bdelaySeqNum = seq_num;
+ assert(delaySlotInfo[tid].numInsts == 0);
+#endif
+
inst->squashingStage = stage_num;
+ DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
+ tid, stage_num, inst->bdelaySeqNum);
+
// Do Squashing
squashAfterInst(inst, stage_num, tid);
}
@@ -239,6 +249,10 @@ FetchSeqUnit::squash(DynInstPtr inst, int squash_stage,
DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n",
tid, PC[tid]);
} else {
+#if !ISA_HAS_DELAY_SLOT
+ assert(0);
+#endif
+
delaySlotInfo[tid].numInsts = 1;
delaySlotInfo[tid].targetReady = false;
delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC;
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index e7c9c3b8f..b10305d5d 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -265,9 +265,6 @@ class O3ThreadContext : public ThreadContext
virtual void setNextNPC(uint64_t val)
{
-#if THE_ISA == ALPHA_ISA
- panic("Not supported on Alpha!");
-#endif
this->cpu->setNextNPC(val, this->thread->threadId());
}
};