summaryrefslogtreecommitdiff
path: root/src/cpu/o3/fetch_impl.hh
diff options
context:
space:
mode:
authorMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:35 -0400
committerMitch Hayenga <mitch.hayenga@arm.com>2014-09-03 07:42:35 -0400
commitecd53009712da59a98ad3c13ed20aaa8e8cd7e29 (patch)
treeb23bf72ad68c2dce684d35ff1e6dda6c4a573150 /src/cpu/o3/fetch_impl.hh
parent1716749c8cec6f9c9f10a0aeaff981be759bb4e5 (diff)
downloadgem5-ecd53009712da59a98ad3c13ed20aaa8e8cd7e29.tar.xz
cpu: Add a fetch queue to the o3 cpu
This patch adds a fetch queue that sits between fetch and decode to the o3 cpu. This effectively decouples fetch from decode stalls allowing it to be more aggressive, running futher ahead in the instruction stream.
Diffstat (limited to 'src/cpu/o3/fetch_impl.hh')
-rw-r--r--src/cpu/o3/fetch_impl.hh61
1 files changed, 44 insertions, 17 deletions
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 637e39957..219444ace 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -82,11 +82,13 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
iewToFetchDelay(params->iewToFetchDelay),
commitToFetchDelay(params->commitToFetchDelay),
fetchWidth(params->fetchWidth),
+ decodeWidth(params->decodeWidth),
retryPkt(NULL),
retryTid(InvalidThreadID),
cacheBlkSize(cpu->cacheLineSize()),
fetchBufferSize(params->fetchBufferSize),
fetchBufferMask(fetchBufferSize - 1),
+ fetchQueueSize(params->fetchQueueSize),
numThreads(params->numThreads),
numFetchingThreads(params->smtNumFetchingThreads),
finishTranslationEvent(this)
@@ -313,12 +315,10 @@ DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
template<class Impl>
void
-DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
{
- fetchQueue = fq_ptr;
-
- // Create wire to write information to proper place in fetch queue.
- toDecode = fetchQueue->getWire(0);
+ // Create wire to write information to proper place in fetch time buf.
+ toDecode = ftb_ptr->getWire(0);
}
template<class Impl>
@@ -342,6 +342,7 @@ DefaultFetch<Impl>::resetStage()
cacheBlocked = false;
priorityList.clear();
+ fetchQueue.clear();
// Setup PC and nextPC with initial state.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
@@ -454,6 +455,10 @@ DefaultFetch<Impl>::isDrained() const
return false;
}
+ // Not drained if fetch queue contains entries
+ if (!fetchQueue.empty())
+ return false;
+
/* The pipeline might start up again in the middle of the drain
* cycle if the finish translation event is scheduled, so make
* sure that's not the case.
@@ -673,11 +678,8 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
fetchStatus[tid] = IcacheWaitResponse;
}
} else {
- // Don't send an instruction to decode if it can't handle it.
- // Asynchronous nature of this function's calling means we have to
- // check 2 signals to see if decode is stalled.
- if (!(numInst < fetchWidth) || stalls[tid].decode ||
- fromDecode->decodeBlock[tid]) {
+ // Don't send an instruction to decode if we can't handle it.
+ if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) {
assert(!finishTranslationEvent.scheduled());
finishTranslationEvent.setFault(fault);
finishTranslationEvent.setReq(mem_req);
@@ -758,6 +760,15 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
fetchStatus[tid] = Squashing;
+ // Empty fetch queue
+ auto inst_itr = fetchQueue.begin();
+ while (inst_itr != fetchQueue.end()) {
+ if ((*inst_itr)->threadNumber == tid)
+ inst_itr = fetchQueue.erase(inst_itr);
+ else
+ ++inst_itr;
+ }
+
// microops are being squashed, it is not known wheather the
// youngest non-squashed microop was marked delayed commit
// or not. Setting the flag to true ensures that the
@@ -796,9 +807,6 @@ DefaultFetch<Impl>::checkStall(ThreadID tid) const
assert(cpu->isDraining());
DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
ret_val = true;
- } else if (stalls[tid].decode) {
- DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
- ret_val = true;
}
return ret_val;
@@ -921,6 +929,21 @@ DefaultFetch<Impl>::tick()
}
}
+ // Send instructions enqueued into the fetch queue to decode.
+ // Limit rate by fetchWidth. Stall if decode is stalled.
+ unsigned instsToDecode = 0;
+ while(!fetchQueue.empty() &&
+ instsToDecode < decodeWidth &&
+ !stalls[fetchQueue.front()->threadNumber].decode) {
+ auto inst = fetchQueue.front();
+ toDecode->insts[toDecode->size++] = inst;
+ DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
+ "fetch queue. Fetch queue size: %i.\n",
+ inst->threadNumber, inst->seqNum, fetchQueue.size());
+ fetchQueue.pop_front();
+ instsToDecode++;
+ }
+
// Reset the number of the instruction we've fetched.
numInst = 0;
}
@@ -1072,7 +1095,11 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
// Write the instruction to the first slot in the queue
// that heads to decode.
assert(numInst < fetchWidth);
- toDecode->insts[toDecode->size++] = instruction;
+ fetchQueue.push_back(instruction);
+ assert(fetchQueue.size() <= fetchQueueSize);
+ DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
+ tid, fetchQueue.size(), fetchQueueSize);
+ //toDecode->insts[toDecode->size++] = instruction;
// Keep track of if we can take an interrupt at this boundary
delayedCommit[tid] = instruction->isDelayedCommit();
@@ -1186,8 +1213,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Loop through instruction memory from the cache.
// Keep issuing while fetchWidth is available and branch is not
// predicted taken
- while (numInst < fetchWidth && !predictedBranch) {
-
+ while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize
+ && !predictedBranch) {
// We need to process more memory if we aren't going to get a
// StaticInst from the rom, the current macroop, or what's already
// in the decoder.
@@ -1310,7 +1337,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
break;
}
} while ((curMacroop || decoder[tid]->instReady()) &&
- numInst < fetchWidth);
+ numInst < fetchWidth && fetchQueue.size() < fetchQueueSize);
}
if (predictedBranch) {