summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/o3/O3CPU.py3
-rw-r--r--src/cpu/o3/commit.hh5
-rw-r--r--src/cpu/o3/commit_impl.hh15
-rw-r--r--src/cpu/o3/cpu.cc5
-rw-r--r--src/cpu/o3/fetch.hh6
-rw-r--r--src/cpu/o3/fetch_impl.hh109
6 files changed, 99 insertions, 44 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index c70a12f1d..4d215328e 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -61,7 +61,8 @@ class DerivO3CPU(BaseCPU):
commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay")
fetchWidth = Param.Unsigned(8, "Fetch width")
fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes")
- fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops")
+ fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops "
+ "per-thread")
renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay")
iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode "
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 473e5e51d..4c9ccf1eb 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012, 2014 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -218,6 +218,9 @@ class DefaultCommit
/** Takes over from another CPU's thread. */
void takeOverFrom();
+ /** Deschedules a thread from scheduling */
+ void deactivateThread(ThreadID tid);
+
/** Ticks the commit stage, which tries to commit instructions. */
void tick();
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 45c231adb..347b23359 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -463,6 +463,19 @@ DefaultCommit<Impl>::takeOverFrom()
template <class Impl>
void
+DefaultCommit<Impl>::deactivateThread(ThreadID tid)
+{
+ list<ThreadID>::iterator thread_it = std::find(priority_list.begin(),
+ priority_list.end(), tid);
+
+ if (thread_it != priority_list.end()) {
+ priority_list.erase(thread_it);
+ }
+}
+
+
+template <class Impl>
+void
DefaultCommit<Impl>::updateStatus()
{
// reset ROB changed variable
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 4f48e29d9..2055d63b6 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2014 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -728,6 +728,9 @@ FullO3CPU<Impl>::deactivateThread(ThreadID tid)
tid);
activeThreads.erase(thread_it);
}
+
+ fetch.deactivateThread(tid);
+ commit.deactivateThread(tid);
}
template <class Impl>
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 2e9428ef1..4d01610d9 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -255,6 +255,8 @@ class DefaultFetch
/** Tells fetch to wake up from a quiesce instruction. */
void wakeFromQuiesce();
+ /** For priority-based fetch policies, need to keep update priorityList */
+ void deactivateThread(ThreadID tid);
private:
/** Reset this pipeline stage */
void resetStage();
@@ -484,8 +486,8 @@ class DefaultFetch
/** The size of the fetch queue in micro-ops */
unsigned fetchQueueSize;
- /** Queue of fetched instructions */
- std::deque<DynInstPtr> fetchQueue;
+ /** Queue of fetched instructions. Per-thread to prevent HoL blocking. */
+ std::deque<DynInstPtr> fetchQueue[Impl::MaxThreads];
/** Whether or not the fetch buffer data is valid. */
bool fetchBufferValid[Impl::MaxThreads];
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 219444ace..d583ae7b6 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -54,6 +54,7 @@
#include "arch/tlb.hh"
#include "arch/utility.hh"
#include "arch/vtophys.hh"
+#include "base/random.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
#include "cpu/base.hh"
@@ -342,7 +343,6 @@ DefaultFetch<Impl>::resetStage()
cacheBlocked = false;
priorityList.clear();
- fetchQueue.clear();
// Setup PC and nextPC with initial state.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
@@ -360,6 +360,8 @@ DefaultFetch<Impl>::resetStage()
fetchBufferPC[tid] = 0;
fetchBufferValid[tid] = false;
+ fetchQueue[tid].clear();
+
priorityList.push_back(tid);
}
@@ -450,14 +452,18 @@ DefaultFetch<Impl>::isDrained() const
* drain other components).
*/
for (ThreadID i = 0; i < numThreads; ++i) {
- if (!(fetchStatus[i] == Idle ||
- (fetchStatus[i] == Blocked && stalls[i].drain)))
+ // Verify fetch queues are drained
+ if (!fetchQueue[i].empty())
return false;
- }
- // Not drained if fetch queue contains entries
- if (!fetchQueue.empty())
- return false;
+ // Return false if not idle or drain stalled
+ if (fetchStatus[i] != Idle) {
+ if (fetchStatus[i] == Blocked && stalls[i].drain)
+ continue;
+ else
+ return false;
+ }
+ }
/* The pipeline might start up again in the middle of the drain
* cycle if the finish translation event is scheduled, so make
@@ -522,6 +528,17 @@ DefaultFetch<Impl>::switchToInactive()
}
template <class Impl>
+void
+DefaultFetch<Impl>::deactivateThread(ThreadID tid)
+{
+ // Update priority list
+ auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
+ if (thread_it != priorityList.end()) {
+ priorityList.erase(thread_it);
+ }
+}
+
+template <class Impl>
bool
DefaultFetch<Impl>::lookupAndUpdateNextPC(
DynInstPtr &inst, TheISA::PCState &nextPC)
@@ -679,7 +696,7 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
}
} else {
// Don't send an instruction to decode if we can't handle it.
- if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) {
+ if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
assert(!finishTranslationEvent.scheduled());
finishTranslationEvent.setFault(fault);
finishTranslationEvent.setReq(mem_req);
@@ -761,13 +778,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
fetchStatus[tid] = Squashing;
// Empty fetch queue
- auto inst_itr = fetchQueue.begin();
- while (inst_itr != fetchQueue.end()) {
- if ((*inst_itr)->threadNumber == tid)
- inst_itr = fetchQueue.erase(inst_itr);
- else
- ++inst_itr;
- }
+ fetchQueue[tid].clear();
// microops are being squashed, it is not known wheather the
// youngest non-squashed microop was marked delayed commit
@@ -915,13 +926,6 @@ DefaultFetch<Impl>::tick()
_status = updateFetchStatus();
}
- // If there was activity this cycle, inform the CPU of it.
- if (wroteToTimeBuffer || cpu->contextSwitch) {
- DPRINTF(Activity, "Activity this cycle.\n");
-
- cpu->activityThisCycle();
- }
-
// Issue the next I-cache request if possible.
for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
if (issuePipelinedIfetch[i]) {
@@ -931,17 +935,45 @@ DefaultFetch<Impl>::tick()
// Send instructions enqueued into the fetch queue to decode.
// Limit rate by fetchWidth. Stall if decode is stalled.
- unsigned instsToDecode = 0;
- while(!fetchQueue.empty() &&
- instsToDecode < decodeWidth &&
- !stalls[fetchQueue.front()->threadNumber].decode) {
- auto inst = fetchQueue.front();
- toDecode->insts[toDecode->size++] = inst;
- DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
- "fetch queue. Fetch queue size: %i.\n",
- inst->threadNumber, inst->seqNum, fetchQueue.size());
- fetchQueue.pop_front();
- instsToDecode++;
+ unsigned insts_to_decode = 0;
+ unsigned available_insts = 0;
+
+ for (auto tid : *activeThreads) {
+ if (!stalls[tid].decode) {
+ available_insts += fetchQueue[tid].size();
+ }
+ }
+
+ // Pick a random thread to start trying to grab instructions from
+ auto tid_itr = activeThreads->begin();
+ std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
+
+ while (available_insts != 0 && insts_to_decode < decodeWidth) {
+ ThreadID tid = *tid_itr;
+ if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
+ auto inst = fetchQueue[tid].front();
+ toDecode->insts[toDecode->size++] = inst;
+ DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
+ "fetch queue. Fetch queue size: %i.\n",
+ tid, inst->seqNum, fetchQueue[tid].size());
+
+ wroteToTimeBuffer = true;
+ fetchQueue[tid].pop_front();
+ insts_to_decode++;
+ available_insts--;
+ }
+
+ tid_itr++;
+ // Wrap around if at end of active threads list
+ if (tid_itr == activeThreads->end())
+ tid_itr = activeThreads->begin();
+ }
+
+ // If there was activity this cycle, inform the CPU of it.
+ if (wroteToTimeBuffer || cpu->contextSwitch) {
+ DPRINTF(Activity, "Activity this cycle.\n");
+
+ cpu->activityThisCycle();
}
// Reset the number of the instruction we've fetched.
@@ -1095,10 +1127,10 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
// Write the instruction to the first slot in the queue
// that heads to decode.
assert(numInst < fetchWidth);
- fetchQueue.push_back(instruction);
- assert(fetchQueue.size() <= fetchQueueSize);
+ fetchQueue[tid].push_back(instruction);
+ assert(fetchQueue[tid].size() <= fetchQueueSize);
DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
- tid, fetchQueue.size(), fetchQueueSize);
+ tid, fetchQueue[tid].size(), fetchQueueSize);
//toDecode->insts[toDecode->size++] = instruction;
// Keep track of if we can take an interrupt at this boundary
@@ -1213,7 +1245,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Loop through instruction memory from the cache.
// Keep issuing while fetchWidth is available and branch is not
// predicted taken
- while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize
+ while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
&& !predictedBranch) {
// We need to process more memory if we aren't going to get a
// StaticInst from the rom, the current macroop, or what's already
@@ -1337,7 +1369,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
break;
}
} while ((curMacroop || decoder[tid]->instReady()) &&
- numInst < fetchWidth && fetchQueue.size() < fetchQueueSize);
+ numInst < fetchWidth &&
+ fetchQueue[tid].size() < fetchQueueSize);
}
if (predictedBranch) {