1 files changed, 77 insertions, 123 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index fe320fa79..b80fc72e1 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage()
     for (int tid = 0; tid < numThreads; tid++) {
         PC[tid] = cpu->readPC(tid);
         nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
         nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
     }
 
     // Size of cache block.
@@ -343,11 +341,6 @@ DefaultFetch<Impl>::initStage()
         cacheDataPC[tid] = 0;
         cacheDataValid[tid] = false;
 
-        delaySlotInfo[tid].branchSeqNum = -1;
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-
         stalls[tid].decode = false;
         stalls[tid].rename = false;
         stalls[tid].iew = false;
@@ -441,10 +434,8 @@ DefaultFetch<Impl>::takeOverFrom()
         nextPC[i] = cpu->readNextPC(i);
 #if ISA_HAS_DELAY_SLOT
         nextNPC[i] = cpu->readNextNPC(i);
-        delaySlotInfo[i].branchSeqNum = -1;
-        delaySlotInfo[i].numInsts = 0;
-        delaySlotInfo[i].targetAddr = 0;
-        delaySlotInfo[i].targetReady = false;
+#else
+        nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
 #endif
         fetchStatus[i] = Running;
     }
@@ -503,54 +494,41 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
     bool predict_taken;
 
     if (!inst->isControl()) {
-#if ISA_HAS_DELAY_SLOT
-        Addr cur_PC = next_PC;
-        next_PC  = cur_PC + instSize;      //next_NPC;
-        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
-        inst->setPredTarg(next_NPC);
-#else
-        next_PC = next_PC + instSize;
-        inst->setPredTarg(next_PC);
-#endif
+        next_PC  = next_NPC;
+        next_NPC = next_NPC + instSize;
+        inst->setPredTarg(next_PC, next_NPC);
+        inst->setPredTaken(false);
         return false;
     }
 
     int tid = inst->threadNumber;
-#if ISA_HAS_DELAY_SLOT
     Addr pred_PC = next_PC;
     predict_taken = branchPred.predict(inst, pred_PC, tid);
 
-    if (predict_taken) {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+/*    if (predict_taken) {
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
+                tid, pred_PC);
     } else {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
-    }
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
+    }*/
 
-    if (predict_taken) {
-        next_PC = next_NPC;
+#if ISA_HAS_DELAY_SLOT
+    next_PC = next_NPC;
+    if (predict_taken)
         next_NPC = pred_PC;
-
-        // Update delay slot info
-        ++delaySlotInfo[tid].numInsts;
-        delaySlotInfo[tid].targetAddr = pred_PC;
-        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
-                delaySlotInfo[tid].numInsts);
-    } else { // !predict_taken
-        if (inst->isCondDelaySlot()) {
-            next_PC = pred_PC;
-            // The delay slot is skipped here if there is on
-            // prediction
-        } else {
-            next_PC = next_NPC;
-            // No need to declare a delay slot here since
-            // there is no for the pred. target to jump
-        }
-
-        next_NPC = next_NPC + instSize;
-    }
+    else
+        next_NPC += instSize;
 #else
-    predict_taken = branchPred.predict(inst, next_PC, tid);
+    if (predict_taken)
+        next_PC = pred_PC;
+    else
+        next_PC += instSize;
+    next_NPC = next_PC + instSize;
 #endif
+/*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+            tid, next_PC, next_NPC);*/
+    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTaken(predict_taken);
 
     ++fetchedBranches;
 
@@ -671,14 +649,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 
 template <class Impl>
 inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+        const Addr &new_NPC, unsigned tid)
 {
-    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
-            tid, new_PC);
+    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+            tid, new_PC, new_NPC);
 
     PC[tid] = new_PC;
-    nextPC[tid] = new_PC + instSize;
-    nextNPC[tid] = new_PC + (2 * instSize);
+    nextPC[tid] = new_NPC;
+    nextNPC[tid] = new_NPC + instSize;
 
     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,21 +683,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
 
 template<class Impl>
 void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
                                      const InstSeqNum &seq_num,
                                      unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
 
-    doSquash(new_PC, tid);
-
-#if ISA_HAS_DELAY_SLOT
-    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-    }
-#endif
+    doSquash(new_PC, new_NPC, tid);
 
     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
@@ -793,20 +764,15 @@ DefaultFetch<Impl>::updateFetchStatus()
 
 template <class Impl>
 void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+                           const InstSeqNum &seq_num,
                            bool squash_delay_slot, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
 
 #if ISA_HAS_DELAY_SLOT
-    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-    }
-
     // Tell the CPU to remove any instructions that are not in the ROB.
     cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
 #else
@@ -929,6 +895,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 #endif
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].nextPC,
+               fromCommit->commitInfo[tid].nextNPC,
                doneSeqNum,
                fromCommit->commitInfo[tid].squashDelaySlot,
                tid);
@@ -984,8 +951,12 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 #else
             InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
 #endif
+            DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
+                    fromDecode->decodeInfo[tid].nextPC,
+                    fromDecode->decodeInfo[tid].nextNPC);
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+                             fromDecode->decodeInfo[tid].nextNPC,
                              doneSeqNum,
                              tid);
 
@@ -1042,6 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // The current PC.
     Addr &fetch_PC = PC[tid];
 
+    Addr &fetch_NPC = nextPC[tid];
+
     // Fault code for memory access.
     Fault fault = NoFault;
 
@@ -1098,7 +1071,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     }
 
     Addr next_PC = fetch_PC;
-    Addr next_NPC = next_PC + instSize;
+    Addr next_NPC = fetch_NPC;
+
     InstSeqNum inst_seq;
     MachInst inst;
     ExtMachInst ext_inst;
@@ -1117,15 +1091,22 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // ended this fetch block.
         bool predicted_branch = false;
 
-        // Need to keep track of whether or not a delay slot
-        // instruction has been fetched
-
         for (;
              offset < cacheBlkSize &&
                  numInst < fetchWidth &&
-                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
+                 !predicted_branch;
              ++numInst) {
 
+            // If we're branching after this instruction, quite fetching
+            // from the same block then.
+            predicted_branch =
+                (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
+            if (predicted_branch) {
+                DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
+                        fetch_PC, fetch_NPC);
+            }
+
+
             // Get a sequence number.
             inst_seq = cpu->getAndIncrementInstSeq();
 
@@ -1145,8 +1126,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 #endif
 
             // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                                 next_PC,
+            DynInstPtr instruction = new DynInst(ext_inst,
+                                                 fetch_PC, fetch_NPC,
+                                                 next_PC, next_NPC,
                                                  inst_seq, cpu);
             instruction->setTid(tid);
 
@@ -1158,6 +1140,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                     "[sn:%lli]\n",
                     tid, instruction->readPC(), inst_seq);
 
+            //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+
             DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
                     tid, instruction->staticInst->disassemble(fetch_PC));
 
@@ -1166,8 +1150,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                                      instruction->staticInst,
                                      instruction->readPC());
 
-            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
-                                                     next_NPC);
+            ///FIXME This needs to be more robust in dealing with delay slots
+#if !ISA_HAS_DELAY_SLOT
+            predicted_branch |=
+#endif
+            lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
+            predicted_branch |= (next_PC != fetch_NPC);
 
             // Add instruction to the CPU's list of instructions.
             instruction->setInstListIt(cpu->addInst(instruction));
@@ -1183,6 +1171,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
             // Move to the next instruction, unless we have a branch.
             fetch_PC = next_PC;
+            fetch_NPC = next_NPC;
 
             if (instruction->isQuiesce()) {
                 DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@@ -1194,29 +1183,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             }
 
             offset += instSize;
-
-#if ISA_HAS_DELAY_SLOT
-            if (predicted_branch) {
-                delaySlotInfo[tid].branchSeqNum = inst_seq;
-
-                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
-                        tid, inst_seq);
-                continue;
-            } else if (delaySlotInfo[tid].numInsts > 0) {
-                --delaySlotInfo[tid].numInsts;
-
-                // It's OK to set PC to target of branch
-                if (delaySlotInfo[tid].numInsts == 0) {
-                    delaySlotInfo[tid].targetReady = true;
-
-                    // Break the looping condition
-                    predicted_branch = true;
-                }
-
-                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
-                        " process.\n", tid, delaySlotInfo[tid].numInsts);
-            }
-#endif
         }
 
         if (offset >= cacheBlkSize) {
@@ -1225,7 +1191,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         } else if (numInst >= fetchWidth) {
             DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
                     "for this cycle.\n", tid);
-        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+        } else if (predicted_branch) {
             DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
                     "instruction encountered.\n", tid);
         }
@@ -1238,26 +1204,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // Now that fetching is completed, update the PC to signify what the next
     // cycle will be.
     if (fault == NoFault) {
+        PC[tid] = next_PC;
+        nextPC[tid] = next_NPC;
+        nextNPC[tid] = next_NPC + instSize;
 #if ISA_HAS_DELAY_SLOT
-        if (delaySlotInfo[tid].targetReady &&
-            delaySlotInfo[tid].numInsts == 0) {
-            // Set PC to target
-            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
-            nextPC[tid] = next_PC + instSize;        //next_NPC
-            nextNPC[tid] = next_PC + (2 * instSize);
-
-            delaySlotInfo[tid].targetReady = false;
-        } else {
-            PC[tid] = next_PC;
-            nextPC[tid] = next_NPC;
-            nextNPC[tid] = next_NPC + instSize;
-        }
-
         DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
 #else
-        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
-        PC[tid] = next_PC;
-        nextPC[tid] = next_PC + instSize;
+        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
 #endif
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1277,10 +1230,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         ext_inst = TheISA::NoopMachInst;
 
         // Create a new DynInst from the dummy nop.
-        DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                             next_PC,
+        DynInstPtr instruction = new DynInst(ext_inst,
+                                             fetch_PC, fetch_NPC,
+                                             next_PC, next_NPC,
                                              inst_seq, cpu);
-        instruction->setPredTarg(next_PC + instSize);
+        instruction->setPredTarg(next_PC, next_NPC);
         instruction->setTid(tid);
 
         instruction->setASID(tid);