diff options
Diffstat (limited to 'src/cpu')
32 files changed, 356 insertions, 192 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 3dcc2f1ec..1d0a015b0 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -130,13 +130,13 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') +need_bp_unit = False if 'O3CPU' in env['CPU_MODELS']: + need_bp_unit = True sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' - o3/2bit_local_pred.cc o3/base_dyn_inst.cc o3/bpred_unit.cc - o3/btb.cc o3/commit.cc o3/decode.cc o3/fetch.cc @@ -148,18 +148,17 @@ if 'O3CPU' in env['CPU_MODELS']: o3/lsq_unit.cc o3/lsq.cc o3/mem_dep_unit.cc - o3/ras.cc o3/rename.cc o3/rename_map.cc o3/rob.cc o3/scoreboard.cc o3/store_set.cc - o3/tournament_pred.cc ''') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') if 'OzoneCPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' ozone/base_dyn_inst.cc ozone/bpred_unit.cc @@ -174,6 +173,14 @@ if 'OzoneCPU' in env['CPU_MODELS']: if env['USE_CHECKER']: sources += Split('ozone/checker_builder.cc') +if need_bp_unit: + sources += Split(''' + o3/2bit_local_pred.cc + o3/btb.cc + o3/ras.cc + o3/tournament_pred.cc + ''') + if env['USE_CHECKER']: sources += Split('checker/cpu.cc') checker_supports = False diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 40cec416b..0b9c80591 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,7 +41,6 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 51f3bb905..5256a411f 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -36,7 +36,6 @@ #include "base/statistics.hh" #include "config/full_system.hh" -#include "cpu/sampler/sampler.hh" #include "sim/eventq.hh" #include "sim/sim_object.hh" #include "arch/isa_traits.hh" diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 785387e60..b520e1be0 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -66,7 +66,6 @@ class ThreadContext; class MemInterface; class Checkpoint; class Request; -class Sampler; /** * CheckerCPU class. Dynamically verifies instructions as they are @@ -374,7 +373,7 @@ class Checker : public CheckerCPU : CheckerCPU(p) { } - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); void verify(DynInstPtr &inst); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 137e1c46d..81f97726c 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -236,9 +236,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) willChangePC = true; newPC = thread->readPC(); DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC()); -#endif // FULL_SYSTEM +#endif } else { #if THE_ISA != MIPS_ISA // go to the next instruction @@ -295,7 +293,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) template <class DynInstPtr> void -Checker<DynInstPtr>::switchOut(Sampler *s) +Checker<DynInstPtr>::switchOut() { instList.clear(); } diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1add32745..5b0c6c4da 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -79,18 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc', CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) - -# Maybe there is a more clever way to determine ISA -# here but since the environment variable isnt passed through -# here the easiest way is this... -sub_template = 'not found' -for argument in sys.argv: - if 'ALPHA' in argument: - sub_template = 'AlphaDynInst<AlphaSimpleImpl>' - -if sub_template == 'not found': - sys.exit('NO CPU_exec_context substitution defined for this ISA') - CpuModel('O3CPU', 'o3_cpu_exec.cc', '#include "cpu/o3/isa_specific.hh"', - { 'CPU_exec_context': sub_template }) + { 'CPU_exec_context': 'O3DynInst' }) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 490305cbf..d61eee4b1 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -91,7 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -207,7 +210,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -333,7 +339,10 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index 8cd8692c6..b928ae654 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -36,6 +36,7 @@ #include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; @@ -88,7 +89,4 @@ struct AlphaSimpleImpl /** The O3Impl to be used. */ typedef AlphaSimpleImpl O3CPUImpl; -/** The O3Impl to be used. */ -typedef DynInst O3DynInst; - #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh index b1f2a487d..c618cee08 100644 --- a/src/cpu/o3/alpha/params.hh +++ b/src/cpu/o3/alpha/params.hh @@ -54,16 +54,7 @@ class AlphaSimpleParams : public O3Params #if FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector<Process *> workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - - unsigned decodeToFetchDelay; +#endif }; #endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 60b555269..49ff5cdad 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -187,11 +187,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + void drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -383,8 +386,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 06b8e8a95..2eb05afac 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -80,7 +80,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameWidth(params->renameWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), trapLatency(params->trapLatency), fetchTrapLatency(params->fetchTrapLatency) @@ -351,22 +351,28 @@ DefaultCommit<Impl>::initStage() template <class Impl> void -DefaultCommit<Impl>::switchOut() +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -557,8 +563,9 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index feca4cdf2..b182d5ca7 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -158,7 +158,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -463,14 +463,13 @@ template <class Impl> void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -490,11 +489,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -503,16 +505,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread from CPU."); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -520,7 +525,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -528,27 +533,18 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -656,7 +652,7 @@ template <class Impl> void FullO3CPU<Impl>::suspendContext(int tid) { - DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid); + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); unscheduleTickEvent(); _status = Idle; /* @@ -676,27 +672,26 @@ template <class Impl> void FullO3CPU<Impl>::deallocateContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid); -/* + DPRINTF(O3CPU,"[tid:%i]: Deallocating Thread Context", tid); + //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); - if (isActive != activeThreads.end()) { + if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); - activeThreads.erase(isActive); + activeThreads.erase(thread_it); removeThread(tid); } -*/ } template <class Impl> void FullO3CPU<Impl>::haltContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid); + DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); /* //Remove From Active List, if Active list<unsigned>::iterator isActive = find( @@ -713,47 +708,72 @@ FullO3CPU<Impl>::haltContext(int tid) } template <class Impl> -void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +bool +FullO3CPU<Impl>::drain(Event *drain_event) { - sampler = _sampler; - switchCount = 0; - fetch.switchOut(); - decode.switchOut(); - rename.switchOut(); - iew.switchOut(); - commit.switchOut(); + drainCount = 0; + drainEvent = drain_event; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); // Wake the CPU and record activity so everything can drain out if // the CPU is currently idle. wakeCPU(); activityRec.activity(); + + return false; } template <class Impl> void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } +FullO3CPU<Impl>::resume() +{ + if (_status == SwitchedOut) + return; + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); -#if USE_CHECKER - if (checker) - checker->switchOut(sampler); -#endif + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; +} +template <class Impl> +void +FullO3CPU<Impl>::signalDrained() +{ + if (++drainCount == NumStages) { if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; + _status = Drained; + drainEvent->process(); } - assert(switchCount <= 5); + assert(drainCount <= 5); +} + +template <class Impl> +void +FullO3CPU<Impl>::switchOut() +{ + fetch.switchOut(); + rename.switchOut(); + commit.switchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 1cff6142d..cf3747601 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -57,6 +57,8 @@ class Checker; class ThreadContext; template <class> class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; @@ -109,6 +111,7 @@ class FullO3CPU : public BaseO3CPU Idle, Halted, Blocked, + Drained, SwitchedOut }; @@ -270,14 +273,21 @@ class FullO3CPU : public BaseO3CPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual bool drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -550,11 +560,11 @@ class FullO3CPU : public BaseO3CPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 1edf3335d..1e96f1884 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 16be01784..71637883b 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -166,10 +166,10 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) template <class Impl> void -DefaultDecode<Impl>::switchOut() +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); } template <class Impl> diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 7fcd21b7d..9611f0455 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -40,8 +40,6 @@ #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -182,11 +180,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + void drain(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -423,6 +424,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 60eb76d17..500b5304e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -109,6 +109,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) @@ -353,7 +354,8 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) // to return. if (fetchStatus[tid] != IcacheWaitResponse || pkt->req != memReq[tid] || - isSwitchedOut()) { + isSwitchedOut() || + drainPending) { ++fetchIcacheSquashes; delete pkt->req; delete pkt; @@ -384,17 +386,25 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::drain() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::resume() { + drainPending = false; +} + +template <class Impl> +void +DefaultFetch<Impl>::switchOut() +{ + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -498,7 +508,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0) || drainPending) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2af68d8fc..774b6dcbd 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -143,11 +143,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +207,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#if DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -384,11 +426,8 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; @@ -403,6 +442,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 8e6fd46a1..c3aa748ae 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -50,8 +50,10 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -74,8 +76,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -349,15 +355,21 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); +} + +template <class Impl> +void +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -559,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -937,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1189,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1351,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index b99bd0900..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -687,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -784,6 +785,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 714acb2ef..bb3da7eec 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -77,6 +77,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh index 69a1bb937..ed53fa97a 100755 --- a/src/cpu/o3/params.hh +++ b/src/cpu/o3/params.hh @@ -47,6 +47,18 @@ class O3Params : public BaseO3CPU::Params unsigned activity; // + // Pointers to key objects + // +#if !FULL_SYSTEM + std::vector<Process *> workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + + // // Caches // // MemInterface *icacheInterface; @@ -86,7 +98,10 @@ class O3Params : public BaseO3CPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 581fc8f81..538dd9bb4 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df8b7f9da..fddbae3db 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -258,15 +258,15 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) template <class Impl> void -DefaultRename<Impl>::switchOut() +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index f726ac99b..8993781ea 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -55,7 +55,6 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; -class Sampler; class RemoteGDB; class GDBListener; @@ -356,12 +355,10 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(Sampler *sampler); + void switchOut(); void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); - Sampler *sampler; - int switchCount; #if FULL_SYSTEM diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 2cdc8a3da..ccb1c8418 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -244,9 +244,8 @@ OzoneCPU<Impl>::~OzoneCPU() template <class Impl> void -OzoneCPU<Impl>::switchOut(Sampler *_sampler) +OzoneCPU<Impl>::switchOut() { - sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); @@ -262,13 +261,12 @@ OzoneCPU<Impl>::signalSwitched() frontEnd->doSwitchOut(); #if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); #endif _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); } assert(switchCount <= 2); } diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index db5dd2acf..a50541189 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -41,7 +41,6 @@ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/simple/base.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39bc86050..57cfa3c2c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -38,7 +38,6 @@ #include "cpu/base.hh" #include "cpu/simple_thread.hh" #include "cpu/pc_event.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU // Static data storage TheISA::IntReg dataReg; - // Pointer to the sampler that is telling us to switchover. - // Used to signal the completion of the pipe drain and schedule - // the next switchover - Sampler *sampler; - StaticInstPtr curStaticInst; void checkForInterrupts(); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index d5bdcfa9b..ad04c8d3b 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -88,7 +88,7 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; - quiesceEvent = NULL; + drainEvent = NULL; state = SimObject::Timing; } @@ -112,17 +112,16 @@ TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) } bool -TimingSimpleCPU::quiesce(Event *quiesce_event) +TimingSimpleCPU::drain(Event *drain_event) { - // TimingSimpleCPU is ready to quiesce if it's not waiting for + // TimingSimpleCPU is ready to drain if it's not waiting for // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { - DPRINTF(Config, "Ready to quiesce\n"); + changeState(SimObject::DrainedTiming); return false; } else { - DPRINTF(Config, "Waiting to quiesce\n"); - changeState(SimObject::Quiescing); - quiesceEvent = quiesce_event; + changeState(SimObject::Draining); + drainEvent = drain_event; return true; } } @@ -422,8 +421,8 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); return; } @@ -479,8 +478,8 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); delete pkt->req; delete pkt; @@ -499,11 +498,11 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) void -TimingSimpleCPU::completeQuiesce() +TimingSimpleCPU::completeDrain() { - DPRINTF(Config, "Done quiescing\n"); - changeState(SimObject::QuiescedTiming); - quiesceEvent->process(); + DPRINTF(Config, "Done draining\n"); + changeState(SimObject::DrainedTiming); + drainEvent->process(); } bool diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index d91144e4a..c360e553e 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,7 +64,7 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } - Event *quiesceEvent; + Event *drainEvent; private: @@ -133,7 +133,7 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - virtual bool quiesce(Event *quiesce_event); + virtual bool drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); @@ -154,7 +154,7 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeDataAccess(Packet *); void advanceInst(Fault fault); private: - void completeQuiesce(); + void completeDrain(); }; #endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index a98078634..ea1a65148 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -39,6 +39,7 @@ #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" +#include "cpu/o3/dyn_inst.hh" #include "sim/host.hh" #include "arch/isa_traits.hh" @@ -51,9 +52,6 @@ class DynInst; class Packet; template <class Impl> -class AlphaDynInst; - -template <class Impl> class OzoneDynInst; class CheckerCPU; |