Made branch delay slots get squashed, and passed back an NPC and NNPC to start fetching from.

--HG-- extra : convert_revision : a2e4845fedf113b5a2fd92d3d28ce5b006278103
author: Gabe Black <gblack@eecs.umich.edu> 2006-12-16 07:32:06 -0500
committer: Gabe Black <gblack@eecs.umich.edu> 2006-12-16 07:32:06 -0500
commit: 37b9966eb466b1655f0d4e604bafa729a3aaea6a (patch)
tree: f4e3b34b69f2445db9f688a819bf2b1d8f007ecd /src
parent: 4d66ddbe35252d3d70a0c3d25d100672db5f1ef9 (diff)
download: gem5-37b9966eb466b1655f0d4e604bafa729a3aaea6a.tar.xz
8 files changed, 70 insertions, 54 deletions
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 4683c77af..d96919007 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -90,6 +90,7 @@ struct DefaultIEWDefaultCommit {
     bool squashDelaySlot[Impl::MaxThreads];
     uint64_t mispredPC[Impl::MaxThreads];
     uint64_t nextPC[Impl::MaxThreads];
+    uint64_t nextNPC[Impl::MaxThreads];
     InstSeqNum squashedSeqNum[Impl::MaxThreads];
 
     bool includeSquashInst[Impl::MaxThreads];
@@ -121,6 +122,7 @@ struct TimeBufStruct {
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
+        uint64_t nextNPC;
 
         unsigned branchCount;
     };
@@ -160,6 +162,7 @@ struct TimeBufStruct {
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
+        uint64_t nextNPC;
 
         // Represents the instruction that has either been retired or
         // squashed.  Similar to having a single bus that broadcasts the
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 3178410a8..194138efc 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -514,6 +514,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
     toIEW->commitInfo[tid].branchMispredict = false;
 
     toIEW->commitInfo[tid].nextPC = PC[tid];
+    toIEW->commitInfo[tid].nextNPC = nextPC[tid];
 }
 
 template <class Impl>
@@ -770,6 +771,7 @@ DefaultCommit<Impl>::commit()
                 fromIEW->branchTaken[tid];
 
             toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+            toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
 
             toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 4056d876f..5616ba398 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -700,7 +700,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
 
     // Squash Throughout Pipeline
     InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, squash_seq_num, true, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
     decode.squash(tid);
     rename.squash(squash_seq_num, tid);
     iew.squash(tid);
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 04016347a..4f5a161e0 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -239,13 +239,13 @@ class DefaultFetch
     bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const Addr &new_PC, unsigned tid);
+    inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. Also tells the CPU to
      * remove any instructions between fetch and decode that should be sqaushed.
      */
-    void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
-                          unsigned tid);
+    void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+                          const InstSeqNum &seq_num, unsigned tid);
 
     /** Checks if a thread is stalled. */
     bool checkStall(unsigned tid) const;
@@ -259,7 +259,8 @@ class DefaultFetch
      * remove any instructions that are not in the ROB. The source of this
      * squash should be the commit stage.
      */
-    void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+    void squash(const Addr &new_PC, const Addr &new_NPC,
+                const InstSeqNum &seq_num,
                 bool squash_delay_slot, unsigned tid);
 
     /** Ticks the fetch stage, processing all inputs signals and fetching
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 6cff52429..5cd2e3514 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@ DefaultFetch<Impl>::initStage()
     for (int tid = 0; tid < numThreads; tid++) {
         PC[tid] = cpu->readPC(tid);
         nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
         nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
     }
 
     // Size of cache block.
@@ -504,14 +502,14 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
 
     if (!inst->isControl()) {
 #if ISA_HAS_DELAY_SLOT
-        Addr cur_PC = next_PC;
-        next_PC  = cur_PC + instSize;      //next_NPC;
-        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
-        inst->setPredTarg(next_NPC);
+        next_PC  = next_NPC;
+        next_NPC = next_NPC + instSize;
+        inst->setPredTarg(next_PC, next_NPC);
 #else
         next_PC = next_PC + instSize;
-        inst->setPredTarg(next_PC);
+        inst->setPredTarg(next_PC, next_PC + sizeof(TheISA::MachInst));
 #endif
+        inst->setPredTaken(false);
         return false;
     }
 
@@ -521,36 +519,29 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
     predict_taken = branchPred.predict(inst, pred_PC, tid);
 
     if (predict_taken) {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken.\n", tid);
     } else {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
     }
 
+    next_PC = next_NPC;
     if (predict_taken) {
-        next_PC = next_NPC;
         next_NPC = pred_PC;
-
         // Update delay slot info
         ++delaySlotInfo[tid].numInsts;
         delaySlotInfo[tid].targetAddr = pred_PC;
         DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
                 delaySlotInfo[tid].numInsts);
-    } else { // !predict_taken
-        if (inst->isCondDelaySlot()) {
-            next_PC = pred_PC;
-            // The delay slot is skipped here if there is on
-            // prediction
-        } else {
-            next_PC = next_NPC;
-            // No need to declare a delay slot here since
-            // there is no for the pred. target to jump
-        }
-
+    } else {
         next_NPC = next_NPC + instSize;
     }
 #else
     predict_taken = branchPred.predict(inst, next_PC, tid);
 #endif
+    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+            tid, next_PC, next_NPC);
+    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTaken(predict_taken);
 
     ++fetchedBranches;
 
@@ -671,14 +662,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 
 template <class Impl>
 inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+        const Addr &new_NPC, unsigned tid)
 {
-    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
-            tid, new_PC);
+    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+            tid, new_PC, new_NPC);
 
     PC[tid] = new_PC;
-    nextPC[tid] = new_PC + instSize;
-    nextNPC[tid] = new_PC + (2 * instSize);
+    nextPC[tid] = new_NPC;
+    nextNPC[tid] = new_NPC + instSize;
 
     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,13 +696,13 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
 
 template<class Impl>
 void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
                                      const InstSeqNum &seq_num,
                                      unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
 
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
 
 #if ISA_HAS_DELAY_SLOT
     if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
@@ -793,12 +785,13 @@ DefaultFetch<Impl>::updateFetchStatus()
 
 template <class Impl>
 void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+                           const InstSeqNum &seq_num,
                            bool squash_delay_slot, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
 
 #if ISA_HAS_DELAY_SLOT
     if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
@@ -928,6 +921,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 #endif
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].nextPC,
+               fromCommit->commitInfo[tid].nextNPC,
                doneSeqNum,
                fromCommit->commitInfo[tid].squashDelaySlot,
                tid);
@@ -985,6 +979,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
 #endif
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+                             fromDecode->decodeInfo[tid].nextNPC,
                              doneSeqNum,
                              tid);
 
@@ -1041,6 +1036,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // The current PC.
     Addr &fetch_PC = PC[tid];
 
+    Addr &fetch_NPC = nextPC[tid];
+
     // Fault code for memory access.
     Fault fault = NoFault;
 
@@ -1097,7 +1094,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     }
 
     Addr next_PC = fetch_PC;
-    Addr next_NPC = next_PC + instSize;
+    Addr next_NPC = fetch_NPC;
+
     InstSeqNum inst_seq;
     MachInst inst;
     ExtMachInst ext_inst;
@@ -1144,8 +1142,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 #endif
 
             // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                                 next_PC,
+            DynInstPtr instruction = new DynInst(ext_inst,
+                                                 fetch_PC, fetch_NPC,
+                                                 next_PC, next_NPC,
                                                  inst_seq, cpu);
             instruction->setTid(tid);
 
@@ -1243,9 +1242,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         if (delaySlotInfo[tid].targetReady &&
             delaySlotInfo[tid].numInsts == 0) {
             // Set PC to target
-            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
-            nextPC[tid] = next_PC + instSize;        //next_NPC
-            nextNPC[tid] = next_PC + (2 * instSize);
+            PC[tid] = next_PC;
+            nextPC[tid] = next_NPC;
+            nextNPC[tid] = next_NPC + instSize;
 
             delaySlotInfo[tid].targetReady = false;
         } else {
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 85db68576..24c8484b4 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -481,25 +481,28 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
     toCommit->branchMispredict[tid] = true;
 
 #if ISA_HAS_DELAY_SLOT
+    int instSize = sizeof(TheISA::MachInst);
     bool branch_taken =
-        (inst->readNextNPC() != (inst->readPC() + 2 * sizeof(TheISA::MachInst)) &&
-         inst->readNextNPC() != (inst->readPC() + 3 * sizeof(TheISA::MachInst)));
+        !(inst->readNextPC() + instSize == inst->readNextNPC() &&
+          (inst->readNextPC() == inst->readPC() + instSize ||
+           inst->readNextPC() == inst->readPC() + 2 * instSize));
     DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
             branch_taken ? "true": "false", inst->seqNum);
 
     toCommit->branchTaken[tid] = branch_taken;
 
-    bool squashDelaySlot =
-        (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
+    bool squashDelaySlot = true;
+//	(inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
     DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
             squashDelaySlot ? "true": "false", inst->seqNum);
     toCommit->squashDelaySlot[tid] = squashDelaySlot;
     //If we're squashing the delay slot, we need to pick back up at NextPC.
     //Otherwise, NextPC isn't being squashed, so we should pick back up at
     //NextNPC.
-    if (squashDelaySlot)
+    if (squashDelaySlot) {
         toCommit->nextPC[tid] = inst->readNextPC();
-    else
+        toCommit->nextNPC[tid] = inst->readNextNPC();
+    } else
         toCommit->nextPC[tid] = inst->readNextNPC();
 #else
     toCommit->branchTaken[tid] = inst->readNextPC() !=
@@ -522,6 +525,9 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readNextPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
 
     toCommit->includeSquashInst[tid] = false;
 
@@ -538,6 +544,9 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextNPC();
+#endif
 
     // Must include the broadcasted SN in the squash.
     toCommit->includeSquashInst[tid] = true;
@@ -1342,6 +1351,7 @@ DefaultIEW<Impl>::executeInsts()
                 fetchRedirect[tid] = true;
 
                 DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
+                DPRINTF(IEW, "Predicted target was %#x.\n", inst->predPC);
 #if ISA_HAS_DELAY_SLOT
                 DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
                         inst->nextNPC);
@@ -1352,7 +1362,7 @@ DefaultIEW<Impl>::executeInsts()
                 // If incorrect, then signal the ROB that it must be squashed.
                 squashDueToBranch(inst, tid);
 
-                if (inst->predTaken()) {
+                if (inst->readPredTaken()) {
                     predictedTakenIncorrect++;
                 } else {
                     predictedNotTakenIncorrect++;
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh
index fda99cb6c..e95ae2fd5 100644
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -56,8 +56,8 @@ class SparcDynInst : public BaseDynInst<Impl>
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-            Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu);
+    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+            Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     SparcDynInst(StaticInstPtr &_staticInst);
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh
index b830ee7bd..c4d30b6f4 100644
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
 #include "cpu/o3/sparc/dyn_inst.hh"
 
 template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-        Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+        Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+        InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
 {
     initVars();
 }
author	Gabe Black <gblack@eecs.umich.edu>	2006-12-16 07:32:06 -0500
committer	Gabe Black <gblack@eecs.umich.edu>	2006-12-16 07:32:06 -0500
commit	37b9966eb466b1655f0d4e604bafa729a3aaea6a (patch)
tree	f4e3b34b69f2445db9f688a819bf2b1d8f007ecd /src
parent	4d66ddbe35252d3d70a0c3d25d100672db5f1ef9 (diff)
download	gem5-37b9966eb466b1655f0d4e604bafa729a3aaea6a.tar.xz