diff options
-rw-r--r-- | src/cpu/o3/O3CPU.py | 1 | ||||
-rw-r--r-- | src/cpu/o3/fetch.hh | 14 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 61 |
3 files changed, 55 insertions, 21 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index fb5b5de2b..c70a12f1d 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -61,6 +61,7 @@ class DerivO3CPU(BaseCPU): commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") + fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops") renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay") iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode " diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 0c1b81d86..2e9428ef1 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2012, 2014 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -401,9 +401,6 @@ class DefaultFetch /** Wire to get commit's information from backwards time buffer. */ typename TimeBuffer<TimeStruct>::wire fromCommit; - /** Internal fetch instruction queue. */ - TimeBuffer<FetchStruct> *fetchQueue; - //Might be annoying how this name is different than the queue. /** Wire used to write any information heading to decode. */ typename TimeBuffer<FetchStruct>::wire toDecode; @@ -455,6 +452,9 @@ class DefaultFetch /** The width of fetch in instructions. */ unsigned fetchWidth; + /** The width of decode in instructions. */ + unsigned decodeWidth; + /** Is the cache blocked? If so no threads can access it. */ bool cacheBlocked; @@ -481,6 +481,12 @@ class DefaultFetch /** The PC of the first instruction loaded into the fetch buffer. */ Addr fetchBufferPC[Impl::MaxThreads]; + /** The size of the fetch queue in micro-ops */ + unsigned fetchQueueSize; + + /** Queue of fetched instructions */ + std::deque<DynInstPtr> fetchQueue; + /** Whether or not the fetch buffer data is valid. */ bool fetchBufferValid[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 637e39957..219444ace 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -82,11 +82,13 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) iewToFetchDelay(params->iewToFetchDelay), commitToFetchDelay(params->commitToFetchDelay), fetchWidth(params->fetchWidth), + decodeWidth(params->decodeWidth), retryPkt(NULL), retryTid(InvalidThreadID), cacheBlkSize(cpu->cacheLineSize()), fetchBufferSize(params->fetchBufferSize), fetchBufferMask(fetchBufferSize - 1), + fetchQueueSize(params->fetchQueueSize), numThreads(params->numThreads), numFetchingThreads(params->smtNumFetchingThreads), finishTranslationEvent(this) @@ -313,12 +315,10 @@ DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr) template<class Impl> void -DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr) { - fetchQueue = fq_ptr; - - // Create wire to write information to proper place in fetch queue. - toDecode = fetchQueue->getWire(0); + // Create wire to write information to proper place in fetch time buf. + toDecode = ftb_ptr->getWire(0); } template<class Impl> @@ -342,6 +342,7 @@ DefaultFetch<Impl>::resetStage() cacheBlocked = false; priorityList.clear(); + fetchQueue.clear(); // Setup PC and nextPC with initial state. for (ThreadID tid = 0; tid < numThreads; ++tid) { @@ -454,6 +455,10 @@ DefaultFetch<Impl>::isDrained() const return false; } + // Not drained if fetch queue contains entries + if (!fetchQueue.empty()) + return false; + /* The pipeline might start up again in the middle of the drain * cycle if the finish translation event is scheduled, so make * sure that's not the case. @@ -673,11 +678,8 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) fetchStatus[tid] = IcacheWaitResponse; } } else { - // Don't send an instruction to decode if it can't handle it. - // Asynchronous nature of this function's calling means we have to - // check 2 signals to see if decode is stalled. - if (!(numInst < fetchWidth) || stalls[tid].decode || - fromDecode->decodeBlock[tid]) { + // Don't send an instruction to decode if we can't handle it. + if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) { assert(!finishTranslationEvent.scheduled()); finishTranslationEvent.setFault(fault); finishTranslationEvent.setReq(mem_req); @@ -758,6 +760,15 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, fetchStatus[tid] = Squashing; + // Empty fetch queue + auto inst_itr = fetchQueue.begin(); + while (inst_itr != fetchQueue.end()) { + if ((*inst_itr)->threadNumber == tid) + inst_itr = fetchQueue.erase(inst_itr); + else + ++inst_itr; + } + // microops are being squashed, it is not known wheather the // youngest non-squashed microop was marked delayed commit // or not. Setting the flag to true ensures that the @@ -796,9 +807,6 @@ DefaultFetch<Impl>::checkStall(ThreadID tid) const assert(cpu->isDraining()); DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid); ret_val = true; - } else if (stalls[tid].decode) { - DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); - ret_val = true; } return ret_val; @@ -921,6 +929,21 @@ DefaultFetch<Impl>::tick() } } + // Send instructions enqueued into the fetch queue to decode. + // Limit rate by fetchWidth. Stall if decode is stalled. + unsigned instsToDecode = 0; + while(!fetchQueue.empty() && + instsToDecode < decodeWidth && + !stalls[fetchQueue.front()->threadNumber].decode) { + auto inst = fetchQueue.front(); + toDecode->insts[toDecode->size++] = inst; + DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from " + "fetch queue. Fetch queue size: %i.\n", + inst->threadNumber, inst->seqNum, fetchQueue.size()); + fetchQueue.pop_front(); + instsToDecode++; + } + // Reset the number of the instruction we've fetched. numInst = 0; } @@ -1072,7 +1095,11 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, // Write the instruction to the first slot in the queue // that heads to decode. assert(numInst < fetchWidth); - toDecode->insts[toDecode->size++] = instruction; + fetchQueue.push_back(instruction); + assert(fetchQueue.size() <= fetchQueueSize); + DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n", + tid, fetchQueue.size(), fetchQueueSize); + //toDecode->insts[toDecode->size++] = instruction; // Keep track of if we can take an interrupt at this boundary delayedCommit[tid] = instruction->isDelayedCommit(); @@ -1186,8 +1213,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Loop through instruction memory from the cache. // Keep issuing while fetchWidth is available and branch is not // predicted taken - while (numInst < fetchWidth && !predictedBranch) { - + while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize + && !predictedBranch) { // We need to process more memory if we aren't going to get a // StaticInst from the rom, the current macroop, or what's already // in the decoder. @@ -1310,7 +1337,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } } while ((curMacroop || decoder[tid]->instReady()) && - numInst < fetchWidth); + numInst < fetchWidth && fetchQueue.size() < fetchQueueSize); } if (predictedBranch) { |