1 files changed, 184 insertions, 39 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 39a13f9f8..990db88ac 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -51,9 +51,6 @@
 
 #include <algorithm>
 
-using namespace std;
-using namespace TheISA;
-
 template<class Impl>
 Tick
 DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
@@ -118,7 +115,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
     // Set fetch stage's status to inactive.
     _status = Inactive;
 
-    string policy = params->smtFetchPolicy;
+    std::string policy = params->smtFetchPolicy;
 
     // Convert string to lowercase
     std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -162,15 +159,22 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
 
         // Create space to store a cache line.
         cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
+
+        delaySlotInfo[tid].branchSeqNum = -1;
+        delaySlotInfo[tid].numInsts = 0;
+        delaySlotInfo[tid].targetAddr = 0;
+        delaySlotInfo[tid].targetReady = false;
 
-        stalls[tid].decode = 0;
-        stalls[tid].rename = 0;
-        stalls[tid].iew = 0;
-        stalls[tid].commit = 0;
+        stalls[tid].decode = false;
+        stalls[tid].rename = false;
+        stalls[tid].iew = false;
+        stalls[tid].commit = false;
     }
 
     // Get the size of an instruction.
-    instSize = sizeof(MachInst);
+    instSize = sizeof(TheISA::MachInst);
 }
 
 template <class Impl>
@@ -286,6 +290,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
     }
 #endif
 
+    // Schedule fetch to get the correct PC from the CPU
+    // scheduleFetchStartupEvent(1);
+
     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
     switchToActive();
@@ -307,7 +314,7 @@ DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 
 template<class Impl>
 void
-DefaultFetch<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
     DPRINTF(Fetch, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
@@ -358,6 +365,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
     }
 
     memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+    cacheDataValid[tid] = true;
 
     if (!drainPending) {
         // Wake up the CPU (if it went to sleep and was waiting on
@@ -423,6 +431,10 @@ DefaultFetch<Impl>::takeOverFrom()
         nextPC[i] = cpu->readNextPC(i);
 #if THE_ISA != ALPHA_ISA
         nextNPC[i] = cpu->readNextNPC(i);
+        delaySlotInfo[i].branchSeqNum = -1;
+        delaySlotInfo[i].numInsts = 0;
+        delaySlotInfo[i].targetAddr = 0;
+        delaySlotInfo[i].targetReady = false;
 #endif
         fetchStatus[i] = Running;
     }
@@ -471,7 +483,8 @@ DefaultFetch<Impl>::switchToInactive()
 
 template <class Impl>
 bool
-DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
+DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
+                                          Addr &next_NPC)
 {
     // Do branch prediction check here.
     // A bit of a misnomer...next_PC is actually the current PC until
@@ -479,12 +492,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
     bool predict_taken;
 
     if (!inst->isControl()) {
+#if THE_ISA == ALPHA_ISA
         next_PC = next_PC + instSize;
         inst->setPredTarg(next_PC);
+#else
+        Addr cur_PC = next_PC;
+        next_PC  = cur_PC + instSize;      //next_NPC;
+        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
+        inst->setPredTarg(next_NPC);
+#endif
         return false;
     }
 
-    predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
+    int tid = inst->threadNumber;
+#if THE_ISA == ALPHA_ISA
+    predict_taken = branchPred.predict(inst, next_PC, tid);
+#else
+    Addr pred_PC = next_PC;
+    predict_taken = branchPred.predict(inst, pred_PC, tid);
+
+    if (predict_taken) {
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+    } else {
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+    }
+
+    if (predict_taken) {
+        next_PC = next_NPC;
+        next_NPC = pred_PC;
+
+        // Update delay slot info
+        ++delaySlotInfo[tid].numInsts;
+        delaySlotInfo[tid].targetAddr = pred_PC;
+        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
+                delaySlotInfo[tid].numInsts);
+    } else { // !predict_taken
+        if (inst->isCondDelaySlot()) {
+            next_PC = pred_PC;
+            // The delay slot is skipped here if there is on
+            // prediction
+        } else {
+            next_PC = next_NPC;
+            // No need to declare a delay slot here since
+            // there is no for the pred. target to jump
+        }
+
+        next_NPC = next_NPC + instSize;
+    }
+#endif
 
     ++fetchedBranches;
 
@@ -519,6 +574,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     // Align the fetch PC so it's at the start of a cache block.
     fetch_PC = icacheBlockAlignPC(fetch_PC);
 
+    // If we've already got the block, no need to try to fetch it again.
+    if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+        return true;
+    }
+
     // Setup the memReq to do a read of the first instruction's address.
     // Set the appropriate read size and flags as well.
     // Build request here.
@@ -550,7 +610,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         // Build packet here.
         PacketPtr data_pkt = new Packet(mem_req,
                                         Packet::ReadReq, Packet::Broadcast);
-        data_pkt->dataDynamic(new uint8_t[cacheBlkSize]);
+        data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
+
+        cacheDataPC[tid] = fetch_PC;
+        cacheDataValid[tid] = false;
 
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 
@@ -595,6 +658,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
 
     PC[tid] = new_PC;
     nextPC[tid] = new_PC + instSize;
+    nextNPC[tid] = new_PC + (2 * instSize);
 
     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -628,6 +692,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
 
     doSquash(new_PC, tid);
 
+#if THE_ISA != ALPHA_ISA
+    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
+        delaySlotInfo[tid].numInsts = 0;
+        delaySlotInfo[tid].targetAddr = 0;
+        delaySlotInfo[tid].targetReady = false;
+    }
+#endif
+
     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
     cpu->removeInstsUntil(seq_num, tid);
@@ -664,7 +736,7 @@ typename DefaultFetch<Impl>::FetchStatus
 DefaultFetch<Impl>::updateFetchStatus()
 {
     //Check Running
-    list<unsigned>::iterator threads = (*activeThreads).begin();
+    std::list<unsigned>::iterator threads = (*activeThreads).begin();
 
     while (threads != (*activeThreads).end()) {
 
@@ -701,21 +773,33 @@ DefaultFetch<Impl>::updateFetchStatus()
 
 template <class Impl>
 void
-DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+                           bool squash_delay_slot, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
     doSquash(new_PC, tid);
 
+#if THE_ISA == ALPHA_ISA
+    // Tell the CPU to remove any instructions that are not in the ROB.
+    cpu->removeInstsNotInROB(tid, true, 0);
+#else
+    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
+        delaySlotInfo[tid].numInsts = 0;
+        delaySlotInfo[tid].targetAddr = 0;
+        delaySlotInfo[tid].targetReady = false;
+    }
+
     // Tell the CPU to remove any instructions that are not in the ROB.
-    cpu->removeInstsNotInROB(tid);
+    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
+#endif
 }
 
 template <class Impl>
 void
 DefaultFetch<Impl>::tick()
 {
-    list<unsigned>::iterator threads = (*activeThreads).begin();
+    std::list<unsigned>::iterator threads = (*activeThreads).begin();
     bool status_change = false;
 
     wroteToTimeBuffer = false;
@@ -817,8 +901,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
                 "from commit.\n",tid);
 
+#if THE_ISA == ALPHA_ISA
+            InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
+#else
+            InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+#endif
         // In any case, squash.
-        squash(fromCommit->commitInfo[tid].nextPC,tid);
+        squash(fromCommit->commitInfo[tid].nextPC,
+               doneSeqNum,
+               fromCommit->commitInfo[tid].squashDelaySlot,
+               tid);
 
         // Also check if there's a mispredict that happened.
         if (fromCommit->commitInfo[tid].branchMispredict) {
@@ -865,9 +957,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
         }
 
         if (fetchStatus[tid] != Squashing) {
+
+#if THE_ISA == ALPHA_ISA
+            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
+#else
+            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
+#endif
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
-                             fromDecode->decodeInfo[tid].doneSeqNum,
+                             doneSeqNum,
                              tid);
 
             return true;
@@ -973,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     }
 
     Addr next_PC = fetch_PC;
+    Addr next_NPC = next_PC + instSize;
     InstSeqNum inst_seq;
     MachInst inst;
     ExtMachInst ext_inst;
@@ -991,10 +1090,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // ended this fetch block.
         bool predicted_branch = false;
 
+        // Need to keep track of whether or not a delay slot
+        // instruction has been fetched
+
         for (;
              offset < cacheBlkSize &&
                  numInst < fetchWidth &&
-                 !predicted_branch;
+                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
              ++numInst) {
 
             // Get a sequence number.
@@ -1004,7 +1106,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             assert(offset <= cacheBlkSize - instSize);
 
             // Get the instruction from the array of the cache line.
-            inst = gtoh(*reinterpret_cast<MachInst *>
+            inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
                         (&cacheData[tid][offset]));
 
             ext_inst = TheISA::makeExtMI(inst, fetch_PC);
@@ -1031,7 +1133,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                                      instruction->staticInst,
                                      instruction->readPC(),tid);
 
-            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
+                                                     next_NPC);
 
             // Add instruction to the CPU's list of instructions.
             instruction->setInstListIt(cpu->addInst(instruction));
@@ -1057,7 +1160,41 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                 break;
             }
 
-            offset+= instSize;
+            offset += instSize;
+
+#if THE_ISA != ALPHA_ISA
+            if (predicted_branch) {
+                delaySlotInfo[tid].branchSeqNum = inst_seq;
+
+                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
+                        tid, inst_seq);
+                continue;
+            } else if (delaySlotInfo[tid].numInsts > 0) {
+                --delaySlotInfo[tid].numInsts;
+
+                // It's OK to set PC to target of branch
+                if (delaySlotInfo[tid].numInsts == 0) {
+                    delaySlotInfo[tid].targetReady = true;
+
+                    // Break the looping condition
+                    predicted_branch = true;
+                }
+
+                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
+                        " process.\n", tid, delaySlotInfo[tid].numInsts);
+            }
+#endif
+        }
+
+        if (offset >= cacheBlkSize) {
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+                    "block.\n", tid);
+        } else if (numInst >= fetchWidth) {
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
+                    "for this cycle.\n", tid);
+        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+                    "instruction encountered.\n", tid);
         }
     }
 
@@ -1068,18 +1205,26 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // Now that fetching is completed, update the PC to signify what the next
     // cycle will be.
     if (fault == NoFault) {
-        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
-
 #if THE_ISA == ALPHA_ISA
+        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
         PC[tid] = next_PC;
         nextPC[tid] = next_PC + instSize;
 #else
-        PC[tid] = next_PC;
-        nextPC[tid] = next_PC + instSize;
-        nextPC[tid] = next_PC + instSize;
+        if (delaySlotInfo[tid].targetReady &&
+            delaySlotInfo[tid].numInsts == 0) {
+            // Set PC to target
+            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
+            nextPC[tid] = next_PC + instSize;        //next_NPC
+            nextNPC[tid] = next_PC + (2 * instSize);
+
+            delaySlotInfo[tid].targetReady = false;
+        } else {
+            PC[tid] = next_PC;
+            nextPC[tid] = next_NPC;
+            nextNPC[tid] = next_NPC + instSize;
+        }
 
-        thread->setNextPC(thread->readNextNPC());
-        thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
 #endif
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1123,9 +1268,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         fetchStatus[tid] = TrapPending;
         status_change = true;
 
-        warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+        warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
 #else // !FULL_SYSTEM
-        warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+        warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
 #endif // FULL_SYSTEM
     }
 }
@@ -1202,8 +1347,8 @@ template<class Impl>
 int
 DefaultFetch<Impl>::roundRobin()
 {
-    list<unsigned>::iterator pri_iter = priorityList.begin();
-    list<unsigned>::iterator end      = priorityList.end();
+    std::list<unsigned>::iterator pri_iter = priorityList.begin();
+    std::list<unsigned>::iterator end      = priorityList.end();
 
     int high_pri;
 
@@ -1232,9 +1377,9 @@ template<class Impl>
 int
 DefaultFetch<Impl>::iqCount()
 {
-    priority_queue<unsigned> PQ;
+    std::priority_queue<unsigned> PQ;
 
-    list<unsigned>::iterator threads = (*activeThreads).begin();
+    std::list<unsigned>::iterator threads = (*activeThreads).begin();
 
     while (threads != (*activeThreads).end()) {
         unsigned tid = *threads++;
@@ -1262,10 +1407,10 @@ template<class Impl>
 int
 DefaultFetch<Impl>::lsqCount()
 {
-    priority_queue<unsigned> PQ;
+    std::priority_queue<unsigned> PQ;
 
 
-    list<unsigned>::iterator threads = (*activeThreads).begin();
+    std::list<unsigned>::iterator threads = (*activeThreads).begin();
 
     while (threads != (*activeThreads).end()) {
         unsigned tid = *threads++;
@@ -1293,7 +1438,7 @@ template<class Impl>
 int
 DefaultFetch<Impl>::branchCount()
 {
-    list<unsigned>::iterator threads = (*activeThreads).begin();
+    std::list<unsigned>::iterator threads = (*activeThreads).begin();
     panic("Branch Count Fetch policy unimplemented\n");
     return *threads;
 }