diff options
-rw-r--r-- | src/cpu/o3/fetch.hh | 61 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 180 |
2 files changed, 209 insertions, 32 deletions
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 90fe5334a..7b9be7b67 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -150,6 +150,45 @@ class DefaultFetch } }; + private: + /* Event to delay delivery of a fetch translation result in case of + * a fault and the nop to carry the fault cannot be generated + * immediately */ + class FinishTranslationEvent : public Event + { + private: + DefaultFetch<Impl> *fetch; + Fault fault; + RequestPtr req; + + public: + FinishTranslationEvent(DefaultFetch<Impl> *_fetch) + : fetch(_fetch) + {} + + void setFault(Fault _fault) + { + fault = _fault; + } + + void setReq(RequestPtr _req) + { + req = _req; + } + + /** Process the delayed finish translation */ + void process() + { + assert(fetch->numInst < fetch->fetchWidth); + fetch->finishTranslation(fault, req); + } + + const char *description() const + { + return "FullO3CPU FetchFinishTranslation"; + } + }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -363,6 +402,12 @@ class DefaultFetch * policy. */ ThreadID branchCount(); + /** Pipeline the next I-cache access to the current one. */ + void pipelineIcacheAccesses(ThreadID tid); + + /** Profile the reasons of fetch stall. */ + void profileStall(ThreadID tid); + private: /** Pointer to the O3CPU. */ O3CPU *cpu; @@ -497,6 +542,12 @@ class DefaultFetch /** Records if fetch is switched out. */ bool switchedOut; + /** Set to true if a pipelined I-cache request should be issued. */ + bool issuePipelinedIfetch[Impl::MaxThreads]; + + /** Event used to delay fault generation of translation faults */ + FinishTranslationEvent finishTranslationEvent; + // @todo: Consider making these vectors and tracking on a per thread basis. /** Stat for total number of cycles stalled due to an icache miss. */ Stats::Scalar icacheStallCycles; @@ -520,6 +571,16 @@ class DefaultFetch Stats::Scalar fetchBlockedCycles; /** Total number of cycles spent in any other state. */ Stats::Scalar fetchMiscStallCycles; + /** Total number of cycles spent in waiting for drains. */ + Stats::Scalar fetchPendingDrainCycles; + /** Total number of stall cycles caused by no active threads to run. */ + Stats::Scalar fetchNoActiveThreadStallCycles; + /** Total number of stall cycles caused by pending traps. */ + Stats::Scalar fetchPendingTrapStallCycles; + /** Total number of stall cycles caused by pending quiesce instructions. */ + Stats::Scalar fetchPendingQuiesceStallCycles; + /** Total number of stall cycles caused by I-cache wait retrys. */ + Stats::Scalar fetchIcacheWaitRetryStallCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar fetchedCacheLines; /** Total number of outstanding icache accesses that were dropped diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 118f132ca..c58892e84 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -49,6 +49,7 @@ #include "base/types.hh" #include "config/the_isa.hh" #include "config/use_checker.hh" +#include "cpu/base.hh" #include "cpu/checker/cpu.hh" #include "cpu/o3/fetch.hh" #include "cpu/exetrace.hh" @@ -59,6 +60,7 @@ #include "params/DerivO3CPU.hh" #include "sim/byteswap.hh" #include "sim/core.hh" +#include "sim/eventq.hh" #if FULL_SYSTEM #include "arch/tlb.hh" @@ -135,6 +137,7 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) : cpu(_cpu), branchPred(params), predecoder(NULL), + numInst(0), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -147,7 +150,8 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), drainPending(false), - switchedOut(false) + switchedOut(false), + finishTranslationEvent(this) { if (numThreads > Impl::MaxThreads) fatal("numThreads (%d) is larger than compiled limit (%d),\n" @@ -268,6 +272,31 @@ DefaultFetch<Impl>::regStats() "bad addresses, or out of MSHRs") .prereq(fetchMiscStallCycles); + fetchPendingDrainCycles + .name(name() + ".PendingDrainCycles") + .desc("Number of cycles fetch has spent waiting on pipes to drain") + .prereq(fetchPendingDrainCycles); + + fetchNoActiveThreadStallCycles + .name(name() + ".NoActiveThreadStallCycles") + .desc("Number of stall cycles due to no active thread to fetch from") + .prereq(fetchNoActiveThreadStallCycles); + + fetchPendingTrapStallCycles + .name(name() + ".PendingTrapStallCycles") + .desc("Number of stall cycles due to pending traps") + .prereq(fetchPendingTrapStallCycles); + + fetchPendingQuiesceStallCycles + .name(name() + ".PendingQuiesceStallCycles") + .desc("Number of stall cycles due to pending quiesce instructions") + .prereq(fetchPendingQuiesceStallCycles); + + fetchIcacheWaitRetryStallCycles + .name(name() + ".IcacheWaitRetryStallCycles") + .desc("Number of stall cycles due to full MSHR") + .prereq(fetchIcacheWaitRetryStallCycles); + fetchIcacheSquashes .name(name() + ".IcacheSquashes") .desc("Number of outstanding Icache misses that were squashed") @@ -675,8 +704,15 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) fetchStatus[tid] = IcacheWaitResponse; } } else { + if (!(numInst < fetchWidth)) { + assert(!finishTranslationEvent.scheduled()); + finishTranslationEvent.setFault(fault); + finishTranslationEvent.setReq(mem_req); + cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() + cpu->ticks(1))); + return; + } DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", - mem_req->getVaddr(), memReq[tid]->getVaddr()); + tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); // Translation faulted, icache request won't be sent. delete mem_req; memReq[tid] = NULL; @@ -851,6 +887,10 @@ DefaultFetch<Impl>::tick() wroteToTimeBuffer = false; + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { + issuePipelinedIfetch[i] = false; + } + while (threads != end) { ThreadID tid = *threads++; @@ -862,10 +902,7 @@ DefaultFetch<Impl>::tick() DPRINTF(Fetch, "Running stage.\n"); - // Reset the number of the instruction we're fetching. - numInst = 0; - -#if FULL_SYSTEM + #if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } @@ -895,6 +932,16 @@ DefaultFetch<Impl>::tick() cpu->activityThisCycle(); } + + // Issue the next I-cache request if possible. + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { + if (issuePipelinedIfetch[i]) { + pipelineIcacheAccesses(i); + } + } + + // Reset the number of the instruction we've fetched. + numInst = 0; } template <class Impl> @@ -1099,10 +1146,13 @@ DefaultFetch<Impl>::fetch(bool &status_change) ThreadID tid = getFetchingThread(fetchPolicy); if (tid == InvalidThreadID || drainPending) { - DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); - // Breaks looping condition in tick() threadFetched = numFetchingThreads; + + if (numThreads == 1) { // @todo Per-thread stats + profileStall(0); + } + return; } @@ -1157,32 +1207,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) if (fetchStatus[tid] == Idle) { ++fetchIdleCycles; DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid); - } else if (fetchStatus[tid] == Blocked) { - ++fetchBlockedCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); - } else if (fetchStatus[tid] == Squashing) { - ++fetchSquashCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); - } else if (fetchStatus[tid] == IcacheWaitResponse) { - ++icacheStallCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", - tid); - } else if (fetchStatus[tid] == ItlbWait) { - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " - "finish! \n", tid); - ++fetchTlbCycles; - } else if (fetchStatus[tid] == TrapPending) { - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap\n", - tid); - } else if (fetchStatus[tid] == NoGoodAddr) { - DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", - tid); } - - - // Status is Idle, Squashing, Blocked, ItlbWait or IcacheWaitResponse - // so fetch should do nothing. + // Status is Idle, so fetch should do nothing. return; } @@ -1329,6 +1356,17 @@ DefaultFetch<Impl>::fetch(bool &status_change) } pc[tid] = thisPC; + + // pipeline a fetch if we're crossing a cache boundary and not in + // a state that would preclude fetching + fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + Addr block_PC = icacheBlockAlignPC(fetchAddr); + issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && + fetchStatus[tid] != IcacheWaitResponse && + fetchStatus[tid] != ItlbWait && + fetchStatus[tid] != IcacheWaitRetry && + fetchStatus[tid] != QuiescePending && + !curMacroop; } template<class Impl> @@ -1511,3 +1549,81 @@ DefaultFetch<Impl>::branchCount() panic("Branch Count Fetch policy unimplemented\n"); return InvalidThreadID; } + +template<class Impl> +void +DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) +{ + if (!issuePipelinedIfetch[tid]) { + return; + } + + // The next PC to access. + TheISA::PCState thisPC = pc[tid]; + + if (isRomMicroPC(thisPC.microPC())) { + return; + } + + Addr pcOffset = fetchOffset[tid]; + Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + + // Align the fetch PC so its at the start of a cache block. + Addr block_PC = icacheBlockAlignPC(fetchAddr); + + // Unless buffer already got the block, fetch it from icache. + if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { + DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " + "starting at PC %s.\n", tid, thisPC); + + fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); + } +} + +template<class Impl> +void +DefaultFetch<Impl>::profileStall(ThreadID tid) { + DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); + + // @todo Per-thread stats + + if (drainPending) { + ++fetchPendingDrainCycles; + DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); + } else if (activeThreads->empty()) { + ++fetchNoActiveThreadStallCycles; + DPRINTF(Fetch, "Fetch has no active thread!\n"); + } else if (fetchStatus[tid] == Blocked) { + ++fetchBlockedCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid); + } else if (fetchStatus[tid] == Squashing) { + ++fetchSquashCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitResponse) { + ++icacheStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", + tid); + } else if (fetchStatus[tid] == ItlbWait) { + ++fetchTlbCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " + "finish!\n", tid); + } else if (fetchStatus[tid] == TrapPending) { + ++fetchPendingTrapStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n", + tid); + } else if (fetchStatus[tid] == QuiescePending) { + ++fetchPendingQuiesceStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce " + "instruction!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitRetry) { + ++fetchIcacheWaitRetryStallCycles; + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n", + tid); + } else if (fetchStatus[tid] == NoGoodAddr) { + DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n", + tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n", + tid, fetchStatus[tid]); + } +} |