diff options
Diffstat (limited to 'src/cpu/o3/fetch_impl.hh')
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 223 |
1 files changed, 184 insertions, 39 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 39a13f9f8..990db88ac 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -51,9 +51,6 @@ #include <algorithm> -using namespace std; -using namespace TheISA; - template<class Impl> Tick DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt) @@ -118,7 +115,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Set fetch stage's status to inactive. _status = Inactive; - string policy = params->smtFetchPolicy; + std::string policy = params->smtFetchPolicy; // Convert string to lowercase std::transform(policy.begin(), policy.end(), policy.begin(), @@ -162,15 +159,22 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; + + delaySlotInfo[tid].branchSeqNum = -1; + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; - stalls[tid].decode = 0; - stalls[tid].rename = 0; - stalls[tid].iew = 0; - stalls[tid].commit = 0; + stalls[tid].decode = false; + stalls[tid].rename = false; + stalls[tid].iew = false; + stalls[tid].commit = false; } // Get the size of an instruction. - instSize = sizeof(MachInst); + instSize = sizeof(TheISA::MachInst); } template <class Impl> @@ -286,6 +290,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) } #endif + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -307,7 +314,7 @@ DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) template<class Impl> void -DefaultFetch<Impl>::setActiveThreads(list<unsigned> *at_ptr) +DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr) { DPRINTF(Fetch, "Setting active threads list pointer.\n"); activeThreads = at_ptr; @@ -358,6 +365,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; if (!drainPending) { // Wake up the CPU (if it went to sleep and was waiting on @@ -423,6 +431,10 @@ DefaultFetch<Impl>::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if THE_ISA != ALPHA_ISA nextNPC[i] = cpu->readNextNPC(i); + delaySlotInfo[i].branchSeqNum = -1; + delaySlotInfo[i].numInsts = 0; + delaySlotInfo[i].targetAddr = 0; + delaySlotInfo[i].targetReady = false; #endif fetchStatus[i] = Running; } @@ -471,7 +483,8 @@ DefaultFetch<Impl>::switchToInactive() template <class Impl> bool -DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -479,12 +492,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) bool predict_taken; if (!inst->isControl()) { +#if THE_ISA == ALPHA_ISA next_PC = next_PC + instSize; inst->setPredTarg(next_PC); +#else + Addr cur_PC = next_PC; + next_PC = cur_PC + instSize; //next_NPC; + next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; + inst->setPredTarg(next_NPC); +#endif return false; } - predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + int tid = inst->threadNumber; +#if THE_ISA == ALPHA_ISA + predict_taken = branchPred.predict(inst, next_PC, tid); +#else + Addr pred_PC = next_PC; + predict_taken = branchPred.predict(inst, pred_PC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + } + + if (predict_taken) { + next_PC = next_NPC; + next_NPC = pred_PC; + + // Update delay slot info + ++delaySlotInfo[tid].numInsts; + delaySlotInfo[tid].targetAddr = pred_PC; + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, + delaySlotInfo[tid].numInsts); + } else { // !predict_taken + if (inst->isCondDelaySlot()) { + next_PC = pred_PC; + // The delay slot is skipped here if there is on + // prediction + } else { + next_PC = next_NPC; + // No need to declare a delay slot here since + // there is no for the pred. target to jump + } + + next_NPC = next_NPC + instSize; + } +#endif ++fetchedBranches; @@ -519,6 +574,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -550,7 +610,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataDynamic(new uint8_t[cacheBlkSize]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -595,6 +658,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) PC[tid] = new_PC; nextPC[tid] = new_PC + instSize; + nextNPC[tid] = new_PC + (2 * instSize); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -628,6 +692,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, doSquash(new_PC, tid); +#if THE_ISA != ALPHA_ISA + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } +#endif + // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); @@ -664,7 +736,7 @@ typename DefaultFetch<Impl>::FetchStatus DefaultFetch<Impl>::updateFetchStatus() { //Check Running - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { @@ -701,21 +773,33 @@ DefaultFetch<Impl>::updateFetchStatus() template <class Impl> void -DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid) +DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, tid); +#if THE_ISA == ALPHA_ISA + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid, true, 0); +#else + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } + // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); + cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); +#endif } template <class Impl> void DefaultFetch<Impl>::tick() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); bool status_change = false; wroteToTimeBuffer = false; @@ -817,8 +901,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // In any case, squash. - squash(fromCommit->commitInfo[tid].nextPC,tid); + squash(fromCommit->commitInfo[tid].nextPC, + doneSeqNum, + fromCommit->commitInfo[tid].squashDelaySlot, + tid); // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -865,9 +957,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fetchStatus[tid] != Squashing) { + +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; +#endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].doneSeqNum, + doneSeqNum, tid); return true; @@ -973,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) } Addr next_PC = fetch_PC; + Addr next_NPC = next_PC + instSize; InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -991,10 +1090,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; + // Need to keep track of whether or not a delay slot + // instruction has been fetched + for (; offset < cacheBlkSize && numInst < fetchWidth && - !predicted_branch; + (!predicted_branch || delaySlotInfo[tid].numInsts > 0); ++numInst) { // Get a sequence number. @@ -1004,7 +1106,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) assert(offset <= cacheBlkSize - instSize); // Get the instruction from the array of the cache line. - inst = gtoh(*reinterpret_cast<MachInst *> + inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> (&cacheData[tid][offset])); ext_inst = TheISA::makeExtMI(inst, fetch_PC); @@ -1031,7 +1133,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) instruction->staticInst, instruction->readPC(),tid); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, + next_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1057,7 +1160,41 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } - offset+= instSize; + offset += instSize; + +#if THE_ISA != ALPHA_ISA + if (predicted_branch) { + delaySlotInfo[tid].branchSeqNum = inst_seq; + + DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", + tid, inst_seq); + continue; + } else if (delaySlotInfo[tid].numInsts > 0) { + --delaySlotInfo[tid].numInsts; + + // It's OK to set PC to target of branch + if (delaySlotInfo[tid].numInsts == 0) { + delaySlotInfo[tid].targetReady = true; + + // Break the looping condition + predicted_branch = true; + } + + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" + " process.\n", tid, delaySlotInfo[tid].numInsts); + } +#endif + } + + if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } } @@ -1068,18 +1205,26 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Now that fetching is completed, update the PC to signify what the next // cycle will be. if (fault == NoFault) { - DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); - #if THE_ISA == ALPHA_ISA + DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; #else - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; - nextPC[tid] = next_PC + instSize; + if (delaySlotInfo[tid].targetReady && + delaySlotInfo[tid].numInsts == 0) { + // Set PC to target + PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC + nextPC[tid] = next_PC + instSize; //next_NPC + nextNPC[tid] = next_PC + (2 * instSize); + + delaySlotInfo[tid].targetReady = false; + } else { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; + } - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]); #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1123,9 +1268,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); #else // !FULL_SYSTEM - warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]); #endif // FULL_SYSTEM } } @@ -1202,8 +1347,8 @@ template<class Impl> int DefaultFetch<Impl>::roundRobin() { - list<unsigned>::iterator pri_iter = priorityList.begin(); - list<unsigned>::iterator end = priorityList.end(); + std::list<unsigned>::iterator pri_iter = priorityList.begin(); + std::list<unsigned>::iterator end = priorityList.end(); int high_pri; @@ -1232,9 +1377,9 @@ template<class Impl> int DefaultFetch<Impl>::iqCount() { - priority_queue<unsigned> PQ; + std::priority_queue<unsigned> PQ; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1262,10 +1407,10 @@ template<class Impl> int DefaultFetch<Impl>::lsqCount() { - priority_queue<unsigned> PQ; + std::priority_queue<unsigned> PQ; - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); while (threads != (*activeThreads).end()) { unsigned tid = *threads++; @@ -1293,7 +1438,7 @@ template<class Impl> int DefaultFetch<Impl>::branchCount() { - list<unsigned>::iterator threads = (*activeThreads).begin(); + std::list<unsigned>::iterator threads = (*activeThreads).begin(); panic("Branch Count Fetch policy unimplemented\n"); return *threads; } |