summaryrefslogtreecommitdiff
path: root/src/cpu/o3/fetch_impl.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3/fetch_impl.hh')
-rw-r--r--src/cpu/o3/fetch_impl.hh200
1 files changed, 77 insertions, 123 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index fe320fa79..b80fc72e1 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage()
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
}
// Size of cache block.
@@ -343,11 +341,6 @@ DefaultFetch<Impl>::initStage()
cacheDataPC[tid] = 0;
cacheDataValid[tid] = false;
- delaySlotInfo[tid].branchSeqNum = -1;
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
-
stalls[tid].decode = false;
stalls[tid].rename = false;
stalls[tid].iew = false;
@@ -441,10 +434,8 @@ DefaultFetch<Impl>::takeOverFrom()
nextPC[i] = cpu->readNextPC(i);
#if ISA_HAS_DELAY_SLOT
nextNPC[i] = cpu->readNextNPC(i);
- delaySlotInfo[i].branchSeqNum = -1;
- delaySlotInfo[i].numInsts = 0;
- delaySlotInfo[i].targetAddr = 0;
- delaySlotInfo[i].targetReady = false;
+#else
+ nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
#endif
fetchStatus[i] = Running;
}
@@ -503,54 +494,41 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
bool predict_taken;
if (!inst->isControl()) {
-#if ISA_HAS_DELAY_SLOT
- Addr cur_PC = next_PC;
- next_PC = cur_PC + instSize; //next_NPC;
- next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
- inst->setPredTarg(next_NPC);
-#else
- next_PC = next_PC + instSize;
- inst->setPredTarg(next_PC);
-#endif
+ next_PC = next_NPC;
+ next_NPC = next_NPC + instSize;
+ inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTaken(false);
return false;
}
int tid = inst->threadNumber;
-#if ISA_HAS_DELAY_SLOT
Addr pred_PC = next_PC;
predict_taken = branchPred.predict(inst, pred_PC, tid);
- if (predict_taken) {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+/* if (predict_taken) {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
+ tid, pred_PC);
} else {
- DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
- }
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
+ }*/
- if (predict_taken) {
- next_PC = next_NPC;
+#if ISA_HAS_DELAY_SLOT
+ next_PC = next_NPC;
+ if (predict_taken)
next_NPC = pred_PC;
-
- // Update delay slot info
- ++delaySlotInfo[tid].numInsts;
- delaySlotInfo[tid].targetAddr = pred_PC;
- DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
- delaySlotInfo[tid].numInsts);
- } else { // !predict_taken
- if (inst->isCondDelaySlot()) {
- next_PC = pred_PC;
- // The delay slot is skipped here if there is on
- // prediction
- } else {
- next_PC = next_NPC;
- // No need to declare a delay slot here since
- // there is no for the pred. target to jump
- }
-
- next_NPC = next_NPC + instSize;
- }
+ else
+ next_NPC += instSize;
#else
- predict_taken = branchPred.predict(inst, next_PC, tid);
+ if (predict_taken)
+ next_PC = pred_PC;
+ else
+ next_PC += instSize;
+ next_NPC = next_PC + instSize;
#endif
+/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+ tid, next_PC, next_NPC);*/
+ inst->setPredTarg(next_PC, next_NPC);
+ inst->setPredTaken(predict_taken);
++fetchedBranches;
@@ -671,14 +649,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
template <class Impl>
inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+ const Addr &new_NPC, unsigned tid)
{
- DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
- tid, new_PC);
+ DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+ tid, new_PC, new_NPC);
PC[tid] = new_PC;
- nextPC[tid] = new_PC + instSize;
- nextNPC[tid] = new_PC + (2 * instSize);
+ nextPC[tid] = new_NPC;
+ nextNPC[tid] = new_NPC + instSize;
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,21 +683,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
template<class Impl>
void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
const InstSeqNum &seq_num,
unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
- doSquash(new_PC, tid);
-
-#if ISA_HAS_DELAY_SLOT
- if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
- }
-#endif
+ doSquash(new_PC, new_NPC, tid);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
@@ -793,20 +764,15 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+ const InstSeqNum &seq_num,
bool squash_delay_slot, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
- doSquash(new_PC, tid);
+ doSquash(new_PC, new_NPC, tid);
#if ISA_HAS_DELAY_SLOT
- if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetAddr = 0;
- delaySlotInfo[tid].targetReady = false;
- }
-
// Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
#else
@@ -929,6 +895,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
#endif
// In any case, squash.
squash(fromCommit->commitInfo[tid].nextPC,
+ fromCommit->commitInfo[tid].nextNPC,
doneSeqNum,
fromCommit->commitInfo[tid].squashDelaySlot,
tid);
@@ -984,8 +951,12 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
#else
InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
#endif
+ DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
+ fromDecode->decodeInfo[tid].nextPC,
+ fromDecode->decodeInfo[tid].nextNPC);
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+ fromDecode->decodeInfo[tid].nextNPC,
doneSeqNum,
tid);
@@ -1042,6 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// The current PC.
Addr &fetch_PC = PC[tid];
+ Addr &fetch_NPC = nextPC[tid];
+
// Fault code for memory access.
Fault fault = NoFault;
@@ -1098,7 +1071,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
Addr next_PC = fetch_PC;
- Addr next_NPC = next_PC + instSize;
+ Addr next_NPC = fetch_NPC;
+
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
@@ -1117,15 +1091,22 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// ended this fetch block.
bool predicted_branch = false;
- // Need to keep track of whether or not a delay slot
- // instruction has been fetched
-
for (;
offset < cacheBlkSize &&
numInst < fetchWidth &&
- (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
+ !predicted_branch;
++numInst) {
+ // If we're branching after this instruction, quite fetching
+ // from the same block then.
+ predicted_branch =
+ (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
+ if (predicted_branch) {
+ DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
+ fetch_PC, fetch_NPC);
+ }
+
+
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
@@ -1145,8 +1126,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
#endif
// Create a new DynInst from the instruction fetched.
- DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
- next_PC,
+ DynInstPtr instruction = new DynInst(ext_inst,
+ fetch_PC, fetch_NPC,
+ next_PC, next_NPC,
inst_seq, cpu);
instruction->setTid(tid);
@@ -1158,6 +1140,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
"[sn:%lli]\n",
tid, instruction->readPC(), inst_seq);
+ //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+
DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
tid, instruction->staticInst->disassemble(fetch_PC));
@@ -1166,8 +1150,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
instruction->staticInst,
instruction->readPC());
- predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
- next_NPC);
+ ///FIXME This needs to be more robust in dealing with delay slots
+#if !ISA_HAS_DELAY_SLOT
+ predicted_branch |=
+#endif
+ lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
+ predicted_branch |= (next_PC != fetch_NPC);
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
@@ -1183,6 +1171,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Move to the next instruction, unless we have a branch.
fetch_PC = next_PC;
+ fetch_NPC = next_NPC;
if (instruction->isQuiesce()) {
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@@ -1194,29 +1183,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
offset += instSize;
-
-#if ISA_HAS_DELAY_SLOT
- if (predicted_branch) {
- delaySlotInfo[tid].branchSeqNum = inst_seq;
-
- DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
- tid, inst_seq);
- continue;
- } else if (delaySlotInfo[tid].numInsts > 0) {
- --delaySlotInfo[tid].numInsts;
-
- // It's OK to set PC to target of branch
- if (delaySlotInfo[tid].numInsts == 0) {
- delaySlotInfo[tid].targetReady = true;
-
- // Break the looping condition
- predicted_branch = true;
- }
-
- DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
- " process.\n", tid, delaySlotInfo[tid].numInsts);
- }
-#endif
}
if (offset >= cacheBlkSize) {
@@ -1225,7 +1191,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
} else if (numInst >= fetchWidth) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
"for this cycle.\n", tid);
- } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+ } else if (predicted_branch) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
"instruction encountered.\n", tid);
}
@@ -1238,26 +1204,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
+ PC[tid] = next_PC;
+ nextPC[tid] = next_NPC;
+ nextNPC[tid] = next_NPC + instSize;
#if ISA_HAS_DELAY_SLOT
- if (delaySlotInfo[tid].targetReady &&
- delaySlotInfo[tid].numInsts == 0) {
- // Set PC to target
- PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
- nextPC[tid] = next_PC + instSize; //next_NPC
- nextNPC[tid] = next_PC + (2 * instSize);
-
- delaySlotInfo[tid].targetReady = false;
- } else {
- PC[tid] = next_PC;
- nextPC[tid] = next_NPC;
- nextNPC[tid] = next_NPC + instSize;
- }
-
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
#else
- DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
- PC[tid] = next_PC;
- nextPC[tid] = next_PC + instSize;
+ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
#endif
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1277,10 +1230,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
ext_inst = TheISA::NoopMachInst;
// Create a new DynInst from the dummy nop.
- DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
- next_PC,
+ DynInstPtr instruction = new DynInst(ext_inst,
+ fetch_PC, fetch_NPC,
+ next_PC, next_NPC,
inst_seq, cpu);
- instruction->setPredTarg(next_PC + instSize);
+ instruction->setPredTarg(next_PC, next_NPC);
instruction->setTid(tid);
instruction->setASID(tid);