diff options
Diffstat (limited to 'src/cpu')
-rw-r--r-- | src/cpu/o3/O3CPU.py | 3 | ||||
-rw-r--r-- | src/cpu/o3/commit.hh | 5 | ||||
-rw-r--r-- | src/cpu/o3/commit_impl.hh | 15 | ||||
-rw-r--r-- | src/cpu/o3/cpu.cc | 5 | ||||
-rw-r--r-- | src/cpu/o3/fetch.hh | 6 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 109 |
6 files changed, 99 insertions, 44 deletions
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index c70a12f1d..4d215328e 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -61,7 +61,8 @@ class DerivO3CPU(BaseCPU): commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") - fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops") + fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops " + "per-thread") renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay") iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode " diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 473e5e51d..4c9ccf1eb 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2012, 2014 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -218,6 +218,9 @@ class DefaultCommit /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Deschedules a thread from scheduling */ + void deactivateThread(ThreadID tid); + /** Ticks the commit stage, which tries to commit instructions. */ void tick(); diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 45c231adb..347b23359 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2014 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -463,6 +463,19 @@ DefaultCommit<Impl>::takeOverFrom() template <class Impl> void +DefaultCommit<Impl>::deactivateThread(ThreadID tid) +{ + list<ThreadID>::iterator thread_it = std::find(priority_list.begin(), + priority_list.end(), tid); + + if (thread_it != priority_list.end()) { + priority_list.erase(thread_it); + } +} + + +template <class Impl> +void DefaultCommit<Impl>::updateStatus() { // reset ROB changed variable diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 4f48e29d9..2055d63b6 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012, 2014 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -728,6 +728,9 @@ FullO3CPU<Impl>::deactivateThread(ThreadID tid) tid); activeThreads.erase(thread_it); } + + fetch.deactivateThread(tid); + commit.deactivateThread(tid); } template <class Impl> diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 2e9428ef1..4d01610d9 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -255,6 +255,8 @@ class DefaultFetch /** Tells fetch to wake up from a quiesce instruction. */ void wakeFromQuiesce(); + /** For priority-based fetch policies, need to keep update priorityList */ + void deactivateThread(ThreadID tid); private: /** Reset this pipeline stage */ void resetStage(); @@ -484,8 +486,8 @@ class DefaultFetch /** The size of the fetch queue in micro-ops */ unsigned fetchQueueSize; - /** Queue of fetched instructions */ - std::deque<DynInstPtr> fetchQueue; + /** Queue of fetched instructions. Per-thread to prevent HoL blocking. */ + std::deque<DynInstPtr> fetchQueue[Impl::MaxThreads]; /** Whether or not the fetch buffer data is valid. */ bool fetchBufferValid[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 219444ace..d583ae7b6 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -54,6 +54,7 @@ #include "arch/tlb.hh" #include "arch/utility.hh" #include "arch/vtophys.hh" +#include "base/random.hh" #include "base/types.hh" #include "config/the_isa.hh" #include "cpu/base.hh" @@ -342,7 +343,6 @@ DefaultFetch<Impl>::resetStage() cacheBlocked = false; priorityList.clear(); - fetchQueue.clear(); // Setup PC and nextPC with initial state. for (ThreadID tid = 0; tid < numThreads; ++tid) { @@ -360,6 +360,8 @@ DefaultFetch<Impl>::resetStage() fetchBufferPC[tid] = 0; fetchBufferValid[tid] = false; + fetchQueue[tid].clear(); + priorityList.push_back(tid); } @@ -450,14 +452,18 @@ DefaultFetch<Impl>::isDrained() const * drain other components). */ for (ThreadID i = 0; i < numThreads; ++i) { - if (!(fetchStatus[i] == Idle || - (fetchStatus[i] == Blocked && stalls[i].drain))) + // Verify fetch queues are drained + if (!fetchQueue[i].empty()) return false; - } - // Not drained if fetch queue contains entries - if (!fetchQueue.empty()) - return false; + // Return false if not idle or drain stalled + if (fetchStatus[i] != Idle) { + if (fetchStatus[i] == Blocked && stalls[i].drain) + continue; + else + return false; + } + } /* The pipeline might start up again in the middle of the drain * cycle if the finish translation event is scheduled, so make @@ -522,6 +528,17 @@ DefaultFetch<Impl>::switchToInactive() } template <class Impl> +void +DefaultFetch<Impl>::deactivateThread(ThreadID tid) +{ + // Update priority list + auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid); + if (thread_it != priorityList.end()) { + priorityList.erase(thread_it); + } +} + +template <class Impl> bool DefaultFetch<Impl>::lookupAndUpdateNextPC( DynInstPtr &inst, TheISA::PCState &nextPC) @@ -679,7 +696,7 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req) } } else { // Don't send an instruction to decode if we can't handle it. - if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) { + if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) { assert(!finishTranslationEvent.scheduled()); finishTranslationEvent.setFault(fault); finishTranslationEvent.setReq(mem_req); @@ -761,13 +778,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, fetchStatus[tid] = Squashing; // Empty fetch queue - auto inst_itr = fetchQueue.begin(); - while (inst_itr != fetchQueue.end()) { - if ((*inst_itr)->threadNumber == tid) - inst_itr = fetchQueue.erase(inst_itr); - else - ++inst_itr; - } + fetchQueue[tid].clear(); // microops are being squashed, it is not known wheather the // youngest non-squashed microop was marked delayed commit @@ -915,13 +926,6 @@ DefaultFetch<Impl>::tick() _status = updateFetchStatus(); } - // If there was activity this cycle, inform the CPU of it. - if (wroteToTimeBuffer || cpu->contextSwitch) { - DPRINTF(Activity, "Activity this cycle.\n"); - - cpu->activityThisCycle(); - } - // Issue the next I-cache request if possible. for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { if (issuePipelinedIfetch[i]) { @@ -931,17 +935,45 @@ DefaultFetch<Impl>::tick() // Send instructions enqueued into the fetch queue to decode. // Limit rate by fetchWidth. Stall if decode is stalled. - unsigned instsToDecode = 0; - while(!fetchQueue.empty() && - instsToDecode < decodeWidth && - !stalls[fetchQueue.front()->threadNumber].decode) { - auto inst = fetchQueue.front(); - toDecode->insts[toDecode->size++] = inst; - DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from " - "fetch queue. Fetch queue size: %i.\n", - inst->threadNumber, inst->seqNum, fetchQueue.size()); - fetchQueue.pop_front(); - instsToDecode++; + unsigned insts_to_decode = 0; + unsigned available_insts = 0; + + for (auto tid : *activeThreads) { + if (!stalls[tid].decode) { + available_insts += fetchQueue[tid].size(); + } + } + + // Pick a random thread to start trying to grab instructions from + auto tid_itr = activeThreads->begin(); + std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1)); + + while (available_insts != 0 && insts_to_decode < decodeWidth) { + ThreadID tid = *tid_itr; + if (!stalls[tid].decode && !fetchQueue[tid].empty()) { + auto inst = fetchQueue[tid].front(); + toDecode->insts[toDecode->size++] = inst; + DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from " + "fetch queue. Fetch queue size: %i.\n", + tid, inst->seqNum, fetchQueue[tid].size()); + + wroteToTimeBuffer = true; + fetchQueue[tid].pop_front(); + insts_to_decode++; + available_insts--; + } + + tid_itr++; + // Wrap around if at end of active threads list + if (tid_itr == activeThreads->end()) + tid_itr = activeThreads->begin(); + } + + // If there was activity this cycle, inform the CPU of it. + if (wroteToTimeBuffer || cpu->contextSwitch) { + DPRINTF(Activity, "Activity this cycle.\n"); + + cpu->activityThisCycle(); } // Reset the number of the instruction we've fetched. @@ -1095,10 +1127,10 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, // Write the instruction to the first slot in the queue // that heads to decode. assert(numInst < fetchWidth); - fetchQueue.push_back(instruction); - assert(fetchQueue.size() <= fetchQueueSize); + fetchQueue[tid].push_back(instruction); + assert(fetchQueue[tid].size() <= fetchQueueSize); DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n", - tid, fetchQueue.size(), fetchQueueSize); + tid, fetchQueue[tid].size(), fetchQueueSize); //toDecode->insts[toDecode->size++] = instruction; // Keep track of if we can take an interrupt at this boundary @@ -1213,7 +1245,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) // Loop through instruction memory from the cache. // Keep issuing while fetchWidth is available and branch is not // predicted taken - while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize + while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize && !predictedBranch) { // We need to process more memory if we aren't going to get a // StaticInst from the rom, the current macroop, or what's already @@ -1337,7 +1369,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) break; } } while ((curMacroop || decoder[tid]->instReady()) && - numInst < fetchWidth && fetchQueue.size() < fetchQueueSize); + numInst < fetchWidth && + fetchQueue[tid].size() < fetchQueueSize); } if (predictedBranch) { |