summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/o3/O3CPU.py1
-rw-r--r--src/cpu/o3/fetch.hh14
-rw-r--r--src/cpu/o3/fetch_impl.hh61
3 files changed, 55 insertions, 21 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index fb5b5de2b..c70a12f1d 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -61,6 +61,7 @@ class DerivO3CPU(BaseCPU):
commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay")
fetchWidth = Param.Unsigned(8, "Fetch width")
fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes")
+ fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops")
renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay")
iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode "
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 0c1b81d86..2e9428ef1 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012, 2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -401,9 +401,6 @@ class DefaultFetch
/** Wire to get commit's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
- /** Internal fetch instruction queue. */
- TimeBuffer<FetchStruct> *fetchQueue;
-
//Might be annoying how this name is different than the queue.
/** Wire used to write any information heading to decode. */
typename TimeBuffer<FetchStruct>::wire toDecode;
@@ -455,6 +452,9 @@ class DefaultFetch
/** The width of fetch in instructions. */
unsigned fetchWidth;
+ /** The width of decode in instructions. */
+ unsigned decodeWidth;
+
/** Is the cache blocked? If so no threads can access it. */
bool cacheBlocked;
@@ -481,6 +481,12 @@ class DefaultFetch
/** The PC of the first instruction loaded into the fetch buffer. */
Addr fetchBufferPC[Impl::MaxThreads];
+ /** The size of the fetch queue in micro-ops */
+ unsigned fetchQueueSize;
+
+ /** Queue of fetched instructions */
+ std::deque<DynInstPtr> fetchQueue;
+
/** Whether or not the fetch buffer data is valid. */
bool fetchBufferValid[Impl::MaxThreads];
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 637e39957..219444ace 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -82,11 +82,13 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
iewToFetchDelay(params->iewToFetchDelay),
commitToFetchDelay(params->commitToFetchDelay),
fetchWidth(params->fetchWidth),
+ decodeWidth(params->decodeWidth),
retryPkt(NULL),
retryTid(InvalidThreadID),
cacheBlkSize(cpu->cacheLineSize()),
fetchBufferSize(params->fetchBufferSize),
fetchBufferMask(fetchBufferSize - 1),
+ fetchQueueSize(params->fetchQueueSize),
numThreads(params->numThreads),
numFetchingThreads(params->smtNumFetchingThreads),
finishTranslationEvent(this)
@@ -313,12 +315,10 @@ DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
template<class Impl>
void
-DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
{
- fetchQueue = fq_ptr;
-
- // Create wire to write information to proper place in fetch queue.
- toDecode = fetchQueue->getWire(0);
+ // Create wire to write information to proper place in fetch time buf.
+ toDecode = ftb_ptr->getWire(0);
}
template<class Impl>
@@ -342,6 +342,7 @@ DefaultFetch<Impl>::resetStage()
cacheBlocked = false;
priorityList.clear();
+ fetchQueue.clear();
// Setup PC and nextPC with initial state.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
@@ -454,6 +455,10 @@ DefaultFetch<Impl>::isDrained() const
return false;
}
+ // Not drained if fetch queue contains entries
+ if (!fetchQueue.empty())
+ return false;
+
/* The pipeline might start up again in the middle of the drain
* cycle if the finish translation event is scheduled, so make
* sure that's not the case.
@@ -673,11 +678,8 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
fetchStatus[tid] = IcacheWaitResponse;
}
} else {
- // Don't send an instruction to decode if it can't handle it.
- // Asynchronous nature of this function's calling means we have to
- // check 2 signals to see if decode is stalled.
- if (!(numInst < fetchWidth) || stalls[tid].decode ||
- fromDecode->decodeBlock[tid]) {
+ // Don't send an instruction to decode if we can't handle it.
+ if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) {
assert(!finishTranslationEvent.scheduled());
finishTranslationEvent.setFault(fault);
finishTranslationEvent.setReq(mem_req);
@@ -758,6 +760,15 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
fetchStatus[tid] = Squashing;
+ // Empty fetch queue
+ auto inst_itr = fetchQueue.begin();
+ while (inst_itr != fetchQueue.end()) {
+ if ((*inst_itr)->threadNumber == tid)
+ inst_itr = fetchQueue.erase(inst_itr);
+ else
+ ++inst_itr;
+ }
+
// microops are being squashed, it is not known wheather the
// youngest non-squashed microop was marked delayed commit
// or not. Setting the flag to true ensures that the
@@ -796,9 +807,6 @@ DefaultFetch<Impl>::checkStall(ThreadID tid) const
assert(cpu->isDraining());
DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
ret_val = true;
- } else if (stalls[tid].decode) {
- DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
- ret_val = true;
}
return ret_val;
@@ -921,6 +929,21 @@ DefaultFetch<Impl>::tick()
}
}
+ // Send instructions enqueued into the fetch queue to decode.
+ // Limit rate by fetchWidth. Stall if decode is stalled.
+ unsigned instsToDecode = 0;
+ while(!fetchQueue.empty() &&
+ instsToDecode < decodeWidth &&
+ !stalls[fetchQueue.front()->threadNumber].decode) {
+ auto inst = fetchQueue.front();
+ toDecode->insts[toDecode->size++] = inst;
+ DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
+ "fetch queue. Fetch queue size: %i.\n",
+ inst->threadNumber, inst->seqNum, fetchQueue.size());
+ fetchQueue.pop_front();
+ instsToDecode++;
+ }
+
// Reset the number of the instruction we've fetched.
numInst = 0;
}
@@ -1072,7 +1095,11 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
// Write the instruction to the first slot in the queue
// that heads to decode.
assert(numInst < fetchWidth);
- toDecode->insts[toDecode->size++] = instruction;
+ fetchQueue.push_back(instruction);
+ assert(fetchQueue.size() <= fetchQueueSize);
+ DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
+ tid, fetchQueue.size(), fetchQueueSize);
+ //toDecode->insts[toDecode->size++] = instruction;
// Keep track of if we can take an interrupt at this boundary
delayedCommit[tid] = instruction->isDelayedCommit();
@@ -1186,8 +1213,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Loop through instruction memory from the cache.
// Keep issuing while fetchWidth is available and branch is not
// predicted taken
- while (numInst < fetchWidth && !predictedBranch) {
-
+ while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize
+ && !predictedBranch) {
// We need to process more memory if we aren't going to get a
// StaticInst from the rom, the current macroop, or what's already
// in the decoder.
@@ -1310,7 +1337,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
break;
}
} while ((curMacroop || decoder[tid]->instReady()) &&
- numInst < fetchWidth);
+ numInst < fetchWidth && fetchQueue.size() < fetchQueueSize);
}
if (predictedBranch) {