diff options
Diffstat (limited to 'src/cpu/o3')
56 files changed, 3272 insertions, 2123 deletions
diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript new file mode 100755 index 000000000..e65d41411 --- /dev/null +++ b/src/cpu/o3/SConscript @@ -0,0 +1,79 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +import os +import os.path +import sys + +# Import build environment variable from SConstruct. +Import('env') + + +################################################################# +# +# Include ISA-specific files for the O3 CPU-model +# +################################################################# + +sources = [] + +if env['TARGET_ISA'] == 'alpha': + sources += Split(''' + alpha/dyn_inst.cc + alpha/cpu.cc + alpha/thread_context.cc + alpha/cpu_builder.cc + ''') +elif env['TARGET_ISA'] == 'mips': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # mips/dyn_inst.cc + # mips/cpu.cc + # mips/thread_context.cc + # mips/cpu_builder.cc + # ''') +elif env['TARGET_ISA'] == 'sparc': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # sparc/dyn_inst.cc + # sparc/cpu.cc + # sparc/thread_context.cc + # sparc/cpu_builder.cc + # ''') +else: + sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) + + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc index 39cae696b..ed10b2fd1 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/cpu_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" -// Force instantiation of AlphaFullCPU for all the implemntations that are +// Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template class AlphaFullCPU<AlphaSimpleImpl>; +template class AlphaO3CPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh new file mode 100644 index 000000000..b961341d5 --- /dev/null +++ b/src/cpu/o3/alpha/cpu.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * AlphaO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template <class Impl> +class AlphaO3CPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState<Impl> ImplState; + typedef O3ThreadState<Impl> Thread; + typedef typename Impl::Params Params; + + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); + +#if FULL_SYSTEM + /** ITB pointer. */ + AlphaITB *itb; + /** DTB pointer. */ + AlphaDTB *dtb; +#endif + + /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return itb->translate(req, thread->getTC()); + } + + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), false); + } + + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), true); + } + +#else + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + +#endif + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + +#if FULL_SYSTEM + /** Posts an interrupt. */ + void post_interrupt(int int_num, int index); + /** Reads the interrupt flag. */ + int readIntrFlag(); + /** Sets the interrupt flags. */ + void setIntrFlag(int val); + /** HW return from error interrupt. */ + Fault hwrei(unsigned tid); + /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + bool simPalCheck(int palFunc, unsigned tid); + + /** Processes any interrupts. */ + void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } +#endif + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + + /** CPU read function, forwards read to LSQ. */ + template <class T> + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template <class T> + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 828977ccb..5e767655d 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -31,21 +31,21 @@ #include <string> #include "cpu/base.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl> +class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl> { public: - DerivAlphaFullCPU(AlphaSimpleParams *p) - : AlphaFullCPU<AlphaSimpleImpl>(p) + DerivO3CPU(AlphaSimpleParams *p) + : AlphaO3CPU<AlphaSimpleImpl>(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; Param<int> numThreads; @@ -91,12 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -104,7 +102,9 @@ Param<unsigned> renameToROBDelay; Param<unsigned> commitWidth; Param<unsigned> squashWidth; Param<Tick> trapLatency; -Param<Tick> fetchTrapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; Param<std::string> predType; Param<unsigned> localPredictorSize; @@ -149,9 +149,9 @@ Param<bool> defer_registration; Param<bool> function_trace; Param<Tick> function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -212,12 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -226,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -271,11 +271,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) -CREATE_SIM_OBJECT(DerivAlphaFullCPU) +CREATE_SIM_OBJECT(DerivO3CPU) { - DerivAlphaFullCPU *cpu; + DerivO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -284,12 +284,12 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) // In non-full-system mode, we infer the number of threads from // the workload if it's not explicitly specified. int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); if (workload.size() == 0) { fatal("Must specify at least one workload!"); } - #endif AlphaSimpleParams *params = new AlphaSimpleParams; @@ -343,12 +343,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -356,7 +354,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; @@ -386,7 +386,16 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->numROBEntries = numROBEntries; params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; params->smtLSQPolicy = smtLSQPolicy; params->smtLSQThreshold = smtLSQThreshold; @@ -401,10 +410,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaFullCPU(params); + cpu = new DerivO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh new file mode 100644 index 000000000..0473e60c2 --- /dev/null +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "config/use_checker.hh" + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/params.hh" +#include "cpu/o3/alpha/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/isa_traits.hh" +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" +#endif + +using namespace TheISA; + +template <class Impl> +AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) +#if FULL_SYSTEM + : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) +#else + : FullO3CPU<Impl>(params) +#endif +{ + DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { +#if FULL_SYSTEM + // SMT is not supported in FS mode yet. + assert(this->numThreads == 1); + this->thread[i] = new Thread(this, 0); + this->thread[i]->setStatus(ThreadContext::Suspended); +#else + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + +#if !FULL_SYSTEM + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); +#endif + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } +#endif // !FULL_SYSTEM + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + AlphaTC<Impl> *alpha_tc = + new AlphaTC<Impl>; + + tc = alpha_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext<AlphaTC<Impl> >( + alpha_tc, this->checker); + } +#endif + + alpha_tc->cpu = this; + alpha_tc->thread = this->thread[i]; + +#if FULL_SYSTEM + // Setup quiesce event. + this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); + + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + this->thread[i]->setPhysPort(phys_port); + this->thread[i]->setVirtPort(virt_port); +#endif + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +#if FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (this->thread[0]->status() == ThreadContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); + this->threadContexts[0]->activate(); + } +} + +template <class Impl> +int +AlphaO3CPU<Impl>::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::hwrei(unsigned tid) +{ + // Need to clear the lock flag upon returning from an interrupt. + this->lockFlag = false; + + this->thread[tid]->kernelStats->hwrei(); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +bool +AlphaO3CPU<Impl>::simPalCheck(int palFunc, unsigned tid) +{ + if (this->thread[tid]->kernelStats) + this->thread[tid]->kernelStats->callpal(palFunc, + this->threadContexts[tid]); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + exitSimLoop("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that + // exists within isa_fullsys_traits.hh. Also assume that thread 0 + // is the one that handles the interrupts. + // @todo: Possibly consolidate the interrupt checking code. + // @todo: Allow other threads to handle interrupts. + + // Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + this->checkInterrupts = false; + + if (this->readMiscReg(IPR_ASTRR, 0)) + panic("asynchronous traps not implemented\n"); + + if (this->readMiscReg(IPR_SIRR, 0)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = this->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { + this->setMiscReg(IPR_ISR, summary, 0); + this->setMiscReg(IPR_INTID, ipl, 0); + // Checker needs to know these two registers were updated. +#if USE_CHECKER + if (this->checker) { + this->checker->threadBase()->setMiscReg(IPR_ISR, summary); + this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); + } +#endif + this->trap(Fault(new InterruptFault), 0); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + this->readMiscReg(IPR_IPLR, 0), ipl, summary); + } +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); +} + +template <class Impl> +TheISA::IntReg +AlphaO3CPU<Impl>::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc index 0c1723eec..97d2f3d08 100644 --- a/src/cpu/o3/alpha_dyn_inst.cc +++ b/src/cpu/o3/alpha/dyn_inst.cc @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst_impl.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst_impl.hh" +#include "cpu/o3/alpha/impl.hh" // Force instantiation of AlphaDynInst for all the implementations that // are needed. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh index 36a08c4a7..9dee610b6 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -34,8 +34,8 @@ #include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" class Packet; @@ -51,7 +51,7 @@ class AlphaDynInst : public BaseDynInst<Impl> { public: /** Typedef for the CPU. */ - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** Binary machine instruction type. */ typedef TheISA::MachInst MachInst; @@ -74,7 +74,7 @@ class AlphaDynInst : public BaseDynInst<Impl> public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh index a73cf4a7d..2d1b4b309 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/dyn_inst.hh" template <class Impl> AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, O3CPU *cpu) : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) { initVars(); @@ -102,15 +102,7 @@ template <class Impl> Fault AlphaDynInst<Impl>::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh index 52f7c2394..b928ae654 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -33,20 +33,21 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; template <class Impl> -class AlphaFullCPU; +class AlphaO3CPU; /** Implementation specific struct that defines several key types to the * CPU, the stages within the CPU, the time buffers, and the DynInst. * The struct defines the ISA, the CPU policy, the specific DynInst, the - * specific FullCPU, and all of the structs from the time buffers to do + * specific O3CPU, and all of the structs from the time buffers to do * communication. * This is one of the key things that must be defined for each hardware * specific CPU implementation. @@ -67,8 +68,14 @@ struct AlphaSimpleImpl */ typedef RefCountingPtr<DynInst> DynInstPtr; - /** The FullCPU type to be used. */ - typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + /** The O3CPU type to be used. */ + typedef AlphaO3CPU<AlphaSimpleImpl> O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; @@ -79,4 +86,7 @@ struct AlphaSimpleImpl }; }; +/** The O3Impl to be used. */ +typedef AlphaSimpleImpl O3CPUImpl; + #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh new file mode 100644 index 000000000..c618cee08 --- /dev/null +++ b/src/cpu/o3/alpha/params.hh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_PARAMS_HH__ +#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the AlphaO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public O3Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif +}; + +#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc new file mode 100755 index 000000000..4a02715bc --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext<AlphaSimpleImpl>; + diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh new file mode 100644 index 000000000..ad52b0d2e --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template <class Impl> +class AlphaTC : public O3ThreadContext<Impl> +{ + public: +#if FULL_SYSTEM + /** Returns a pointer to the ITB. */ + virtual AlphaITB *getITBPtr() { return this->cpu->itb; } + + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; } + + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return this->thread->quiesceEvent; + } + + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ + virtual bool inPalMode() + { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); } +#endif + + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { + panic("Alpha has no NextNPC!"); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } + + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh deleted file mode 100644 index f81837f3c..000000000 --- a/src/cpu/o3/alpha_cpu.hh +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__ -#define __CPU_O3_ALPHA_FULL_CPU_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/thread_context.hh" -#include "cpu/o3/cpu.hh" -#include "sim/byteswap.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -class TranslatingPort; - -/** - * AlphaFullCPU class. Derives from the FullO3CPU class, and - * implements all ISA and implementation specific functions of the - * CPU. This is the CPU class that is used for the SimObjects, and is - * what is given to the DynInsts. Most of its state exists in the - * FullO3CPU; the state is has is mainly for ISA specific - * functionality. - */ -template <class Impl> -class AlphaFullCPU : public FullO3CPU<Impl> -{ - protected: - typedef TheISA::IntReg IntReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::MiscReg MiscReg; - typedef TheISA::RegFile RegFile; - typedef TheISA::MiscRegFile MiscRegFile; - - public: - typedef O3ThreadState<Impl> ImplState; - typedef O3ThreadState<Impl> Thread; - typedef typename Impl::Params Params; - - /** Constructs an AlphaFullCPU with the given parameters. */ - AlphaFullCPU(Params *params); - - /** - * Derived ThreadContext class for use with the AlphaFullCPU. It - * provides the interface for any external objects to access a - * single thread's state and some general CPU state. Any time - * external objects try to update state through this interface, - * the CPU will create an event to squash all in-flight - * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaFullCPU because - * not all architectural state is located within the O3ThreadState - * (such as the commit PC, and registers), and specific actions - * must be taken when using this interface (such as squashing all - * in-flight instructions when doing a write to this interface). - */ - class AlphaTC : public ThreadContext - { - public: - /** Pointer to the CPU. */ - AlphaFullCPU<Impl> *cpu; - - /** Pointer to the thread state that this TC corrseponds to. */ - O3ThreadState<Impl> *thread; - - /** Returns a pointer to this CPU. */ - virtual BaseCPU *getCpuPtr() { return cpu; } - - /** Sets this CPU's ID. */ - virtual void setCpuId(int id) { cpu->cpu_id = id; } - - /** Reads this CPU's ID. */ - virtual int readCpuId() { return cpu->cpu_id; } - -#if FULL_SYSTEM - /** Returns a pointer to the system. */ - virtual System *getSystemPtr() { return cpu->system; } - - /** Returns a pointer to physical memory. */ - virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } - - /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } - - /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } - - /** Returns a pointer to this thread's kernel statistics. */ - virtual Kernel::Statistics *getKernelStats() - { return thread->kernelStats; } - - virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } - - virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); - - void delVirtPort(VirtualPort *vp); -#else - virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } - - /** Returns a pointer to this thread's process. */ - virtual Process *getProcessPtr() { return thread->getProcessPtr(); } -#endif - /** Returns this thread's status. */ - virtual Status status() const { return thread->status(); } - - /** Sets this thread's status. */ - virtual void setStatus(Status new_status) - { thread->setStatus(new_status); } - - /** Set the status to Active. Optional delay indicates number of - * cycles to wait before beginning execution. */ - virtual void activate(int delay = 1); - - /** Set the status to Suspended. */ - virtual void suspend(); - - /** Set the status to Unallocated. */ - virtual void deallocate(); - - /** Set the status to Halted. */ - virtual void halt(); - -#if FULL_SYSTEM - /** Dumps the function profiling information. - * @todo: Implement. - */ - virtual void dumpFuncProfile(); -#endif - /** Takes over execution of a thread from another CPU. */ - virtual void takeOverFrom(ThreadContext *old_context); - - /** Registers statistics associated with this TC. */ - virtual void regStats(const std::string &name); - - /** Serializes state. */ - virtual void serialize(std::ostream &os); - /** Unserializes state. */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); - -#if FULL_SYSTEM - /** Returns pointer to the quiesce event. */ - virtual EndQuiesceEvent *getQuiesceEvent(); - - /** Reads the last tick that this thread was activated on. */ - virtual Tick readLastActivate(); - /** Reads the last tick that this thread was suspended on. */ - virtual Tick readLastSuspend(); - - /** Clears the function profiling information. */ - virtual void profileClear(); - /** Samples the function profiling information. */ - virtual void profileSample(); -#endif - /** Returns this thread's ID number. */ - virtual int getThreadNum() { return thread->readTid(); } - - /** Returns the instruction this thread is currently committing. - * Only used when an instruction faults. - */ - virtual TheISA::MachInst getInst(); - - /** Copies the architectural registers from another TC into this TC. */ - virtual void copyArchRegs(ThreadContext *tc); - - /** Resets all architectural registers to 0. */ - virtual void clearArchRegs(); - - /** Reads an integer register. */ - virtual uint64_t readIntReg(int reg_idx); - - virtual FloatReg readFloatReg(int reg_idx, int width); - - virtual FloatReg readFloatReg(int reg_idx); - - virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - - virtual FloatRegBits readFloatRegBits(int reg_idx); - - /** Sets an integer register to a value. */ - virtual void setIntReg(int reg_idx, uint64_t val); - - virtual void setFloatReg(int reg_idx, FloatReg val, int width); - - virtual void setFloatReg(int reg_idx, FloatReg val); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val); - - /** Reads this thread's PC. */ - virtual uint64_t readPC() - { return cpu->readPC(thread->readTid()); } - - /** Sets this thread's PC. */ - virtual void setPC(uint64_t val); - - /** Reads this thread's next PC. */ - virtual uint64_t readNextPC() - { return cpu->readNextPC(thread->readTid()); } - - /** Sets this thread's next PC. */ - virtual void setNextPC(uint64_t val); - - virtual uint64_t readNextNPC() - { - panic("Alpha has no NextNPC!"); - return 0; - } - - virtual void setNextNPC(uint64_t val) - { } - - /** Reads a miscellaneous register. */ - virtual MiscReg readMiscReg(int misc_reg) - { return cpu->readMiscReg(misc_reg, thread->readTid()); } - - /** Reads a misc. register, including any side-effects the - * read might have as defined by the architecture. */ - virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } - - /** Sets a misc. register. */ - virtual Fault setMiscReg(int misc_reg, const MiscReg &val); - - /** Sets a misc. register, including any side-effects the - * write might have as defined by the architecture. */ - virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - - /** Returns the number of consecutive store conditional failures. */ - // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - virtual void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } - -#if FULL_SYSTEM - /** Returns if the thread is currently in PAL mode, based on - * the PC's value. */ - virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } -#endif - // Only really makes sense for old CPU model. Lots of code - // outside the CPU still checks this function, so it will - // always return false to keep everything working. - /** Checks if the thread is misspeculating. Because it is - * very difficult to determine if the thread is - * misspeculating, this is set as false. */ - virtual bool misspeculating() { return false; } - -#if !FULL_SYSTEM - /** Gets a syscall argument by index. */ - virtual IntReg getSyscallArg(int i); - - /** Sets a syscall argument. */ - virtual void setSyscallArg(int i, IntReg val); - - /** Sets the syscall return value. */ - virtual void setSyscallReturn(SyscallReturn return_value); - - /** Executes a syscall in SE mode. */ - virtual void syscall(int64_t callnum) - { return cpu->syscall(callnum, thread->readTid()); } - - /** Reads the funcExeInst counter. */ - virtual Counter readFuncExeInst() { return thread->funcExeInst; } -#endif - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) - { panic("Not supported on Alpha!"); } - }; - -#if FULL_SYSTEM - /** ITB pointer. */ - AlphaITB *itb; - /** DTB pointer. */ - AlphaDTB *dtb; -#endif - - /** Registers statistics. */ - void regStats(); - -#if FULL_SYSTEM - /** Translates instruction requestion. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return itb->translate(req, thread->getTC()); - } - - /** Translates data read request. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), false); - } - - /** Translates data write request. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), true); - } - -#else - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - -#endif - /** Reads a miscellaneous register. */ - MiscReg readMiscReg(int misc_reg, unsigned tid); - - /** Reads a misc. register, including any side effects the read - * might have as defined by the architecture. - */ - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); - - /** Sets a miscellaneous register. */ - Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); - - /** Sets a misc. register, including any side effects the write - * might have as defined by the architecture. - */ - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); - - /** Initiates a squash of all in-flight instructions for a given - * thread. The source of the squash is an external update of - * state through the TC. - */ - void squashFromTC(unsigned tid); - -#if FULL_SYSTEM - /** Posts an interrupt. */ - void post_interrupt(int int_num, int index); - /** Reads the interrupt flag. */ - int readIntrFlag(); - /** Sets the interrupt flags. */ - void setIntrFlag(int val); - /** HW return from error interrupt. */ - Fault hwrei(unsigned tid); - /** Returns if a specific PC is a PAL mode PC. */ - bool inPalMode(uint64_t PC) - { return AlphaISA::PcPAL(PC); } - - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); - bool simPalCheck(int palFunc, unsigned tid); - - /** Processes any interrupts. */ - void processInterrupts(); - - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } -#endif - - -#if !FULL_SYSTEM - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - void syscall(int64_t callnum, int tid); - /** Gets a syscall argument. */ - IntReg getSyscallArg(int i, int tid); - - /** Used to shift args for indirect syscall. */ - void setSyscallArg(int i, IntReg val, int tid); - - /** Sets the return value of a syscall. */ - void setSyscallReturn(SyscallReturn return_value, int tid); -#endif - - /** CPU read function, forwards read to LSQ. */ - template <class T> - Fault read(RequestPtr &req, T &data, int load_idx) - { - return this->iew.ldstQueue.read(req, data, load_idx); - } - - /** CPU write function, forwards write to LSQ. */ - template <class T> - Fault write(RequestPtr &req, T &data, int store_idx) - { - return this->iew.ldstQueue.write(req, data, store_idx); - } - - Addr lockAddr; - - /** Temporary fix for the lock flag, works in the UP case. */ - bool lockFlag; -}; - -#endif // __CPU_O3_ALPHA_FULL_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh deleted file mode 100644 index 98290e57f..000000000 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ /dev/null @@ -1,872 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#include "arch/alpha/faults.hh" -#include "base/cprintf.hh" -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/checker/thread_context.hh" -#include "sim/sim_events.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_params.hh" -#include "cpu/o3/comm.hh" -#include "cpu/o3/thread_state.hh" - -#if FULL_SYSTEM -#include "arch/alpha/osfpal.hh" -#include "arch/isa_traits.hh" -#include "cpu/quiesce_event.hh" -#include "kern/kernel_stats.hh" -#include "sim/system.hh" -#endif - -using namespace TheISA; - -template <class Impl> -AlphaFullCPU<Impl>::AlphaFullCPU(Params *params) -#if FULL_SYSTEM - : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) -#else - : FullO3CPU<Impl>(params) -#endif -{ - DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); - - // Setup any thread state. - this->thread.resize(this->numThreads); - - for (int i = 0; i < this->numThreads; ++i) { -#if FULL_SYSTEM - // SMT is not supported in FS mode yet. - assert(this->numThreads == 1); - this->thread[i] = new Thread(this, 0); - this->thread[i]->setStatus(ThreadContext::Suspended); -#else - if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", - i, this->thread[i]); - this->thread[i] = new Thread(this, i, params->workload[i], - i, params->mem); - - this->thread[i]->setStatus(ThreadContext::Suspended); - -#if !FULL_SYSTEM - /* Use this port to for syscall emulation writes to memory. */ - Port *mem_port; - TranslatingPort *trans_port; - trans_port = new TranslatingPort(csprintf("%s-%d-funcport", - name(), i), - params->workload[i]->pTable, - false); - mem_port = params->mem->getPort("functional"); - mem_port->setPeer(trans_port); - trans_port->setPeer(mem_port); - this->thread[i]->setMemPort(trans_port); -#endif - //usedTids[i] = true; - //threadMap[i] = i; - } else { - //Allocate Empty thread so M5 can use later - //when scheduling threads to CPU - Process* dummy_proc = NULL; - - this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); - //usedTids[i] = false; - } -#endif // !FULL_SYSTEM - - ThreadContext *tc; - - // Setup the TC that will serve as the interface to the threads/CPU. - AlphaTC *alpha_tc = new AlphaTC; - - // If we're using a checker, then the TC should be the - // CheckerThreadContext. - if (params->checker) { - tc = new CheckerThreadContext<AlphaTC>( - alpha_tc, this->checker); - } else { - tc = alpha_tc; - } - - alpha_tc->cpu = this; - alpha_tc->thread = this->thread[i]; - -#if FULL_SYSTEM - // Setup quiesce event. - this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); - - Port *mem_port; - FunctionalPort *phys_port; - VirtualPort *virt_port; - phys_port = new FunctionalPort(csprintf("%s-%d-funcport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(phys_port); - phys_port->setPeer(mem_port); - - virt_port = new VirtualPort(csprintf("%s-%d-vport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(virt_port); - virt_port->setPeer(mem_port); - - this->thread[i]->setPhysPort(phys_port); - this->thread[i]->setVirtPort(virt_port); -#endif - // Give the thread the TC. - this->thread[i]->tc = tc; - - // Add the TC to the CPU's list of TC's. - this->threadContexts.push_back(tc); - } - - for (int i=0; i < this->numThreads; i++) { - this->thread[i]->setFuncExeInst(0); - } - - // Sets CPU pointers. These must be set at this level because the CPU - // pointers are defined to be the highest level of CPU class. - this->fetch.setCPU(this); - this->decode.setCPU(this); - this->rename.setCPU(this); - this->iew.setCPU(this); - this->commit.setCPU(this); - - this->rob.setCPU(this); - this->regFile.setCPU(this); - - lockAddr = 0; - lockFlag = false; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::regStats() -{ - // Register stats for everything that has stats. - this->fullCPURegStats(); - this->fetch.regStats(); - this->decode.regStats(); - this->rename.regStats(); - this->iew.regStats(); - this->commit.regStats(); -} - -#if FULL_SYSTEM -template <class Impl> -VirtualPort * -AlphaFullCPU<Impl>::AlphaTC::getVirtPort(ThreadContext *src_tc) -{ - if (!src_tc) - return thread->getVirtPort(); - - VirtualPort *vp; - Port *mem_port; - - vp = new VirtualPort("tc-vport", src_tc); - mem_port = cpu->system->physmem->getPort("functional"); - mem_port->setPeer(vp); - vp->setPeer(mem_port); - return vp; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::takeOverFrom(ThreadContext *old_context) -{ - // some things should already be set up -#if FULL_SYSTEM - assert(getSystemPtr() == old_context->getSystemPtr()); -#else - assert(getProcessPtr() == old_context->getProcessPtr()); -#endif - - // copy over functional state - setStatus(old_context->status()); - copyArchRegs(old_context); - setCpuId(old_context->readCpuId()); - -#if !FULL_SYSTEM - thread->funcExeInst = old_context->readFuncExeInst(); -#else - EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); - if (other_quiesce) { - // Point the quiesce event's TC at this TC so that it wakes up - // the proper CPU. - other_quiesce->tc = this; - } - if (thread->quiesceEvent) { - thread->quiesceEvent->tc = this; - } - - // Transfer kernel stats from one CPU to the other. - thread->kernelStats = old_context->getKernelStats(); -// storeCondFailures = 0; - cpu->lockFlag = false; -#endif - - old_context->setStatus(ThreadContext::Unallocated); - - thread->inSyscall = false; - thread->trapPending = false; -} - -#if FULL_SYSTEM -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::delVirtPort(VirtualPort *vp) -{ - delete vp->getPeer(); - delete vp; -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::activate(int delay) -{ - DPRINTF(FullCPU, "Calling activate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Active) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; -#endif - - if (thread->status() == ThreadContext::Unallocated) { - cpu->activateWhenReady(thread->readTid()); - return; - } - - thread->setStatus(ThreadContext::Active); - - // status() == Suspended - cpu->activateContext(thread->readTid(), delay); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::suspend() -{ - DPRINTF(FullCPU, "Calling suspend on AlphaTC\n"); - - if (thread->status() == ThreadContext::Suspended) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; - thread->lastSuspend = curTick; -#endif -/* -#if FULL_SYSTEM - // Don't change the status from active if there are pending interrupts - if (cpu->check_interrupts()) { - assert(status() == ThreadContext::Active); - return; - } -#endif -*/ - thread->setStatus(ThreadContext::Suspended); - cpu->suspendContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::deallocate() -{ - DPRINTF(FullCPU, "Calling deallocate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Unallocated) - return; - - thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::halt() -{ - DPRINTF(FullCPU, "Calling halt on AlphaTC\n"); - - if (thread->status() == ThreadContext::Halted) - return; - - thread->setStatus(ThreadContext::Halted); - cpu->haltContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::regStats(const std::string &name) -{ -#if FULL_SYSTEM - thread->kernelStats = new Kernel::Statistics(cpu->system); - thread->kernelStats->regStats(name + ".kern"); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::serialize(std::ostream &os) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->serialize(os); -#endif - -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->unserialize(cp, section); -#endif - -} - -#if FULL_SYSTEM -template <class Impl> -EndQuiesceEvent * -AlphaFullCPU<Impl>::AlphaTC::getQuiesceEvent() -{ - return thread->quiesceEvent; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastActivate() -{ - return thread->lastActivate; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastSuspend() -{ - return thread->lastSuspend; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileClear() -{} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileSample() -{} -#endif - -template <class Impl> -TheISA::MachInst -AlphaFullCPU<Impl>::AlphaTC:: getInst() -{ - return thread->getInst(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::copyArchRegs(ThreadContext *tc) -{ - // This function will mess things up unless the ROB is empty and - // there are no instructions in the pipeline. - unsigned tid = thread->readTid(); - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i); - - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, cpu->readIntReg(renamed_reg), - tc->readIntReg(i)); - - cpu->setIntReg(renamed_reg, tc->readIntReg(i)); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); - cpu->setFloatRegBits(renamed_reg, - tc->readFloatRegBits(i)); - } - - // Copy the misc regs. - copyMiscRegs(tc, this); - - // Then finally set the PC and the next PC. - cpu->setPC(tc->readPC(), tid); - cpu->setNextPC(tc->readNextPC(), tid); -#if !FULL_SYSTEM - this->thread->funcExeInst = tc->readFuncExeInst(); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::clearArchRegs() -{} - -template <class Impl> -uint64_t -AlphaFullCPU<Impl>::AlphaTC::readIntReg(int reg_idx) -{ - return cpu->readArchIntReg(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx, int width) -{ - switch(width) { - case 32: - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); - case 64: - return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); - default: - panic("Unsupported width!"); - return 0; - } -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx) -{ - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx, int width) -{ - DPRINTF(Fault, "Reading floatint register through the TC!\n"); - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx) -{ - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setIntReg(int reg_idx, uint64_t val) -{ - cpu->setArchIntReg(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) -{ - switch(width) { - case 32: - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - break; - case 64: - cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); - break; - } - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val) -{ - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, - int width) -{ - DPRINTF(Fault, "Setting floatint register through the TC!\n"); - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) -{ - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setPC(uint64_t val) -{ - cpu->setPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setNextPC(uint64_t val) -{ - cpu->setNextPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, - thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -#if !FULL_SYSTEM - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::AlphaTC::getSyscallArg(int i) -{ - return cpu->getSyscallArg(i, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallArg(int i, IntReg val) -{ - cpu->setSyscallArg(i, val, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallReturn(SyscallReturn return_value) -{ - cpu->setSyscallReturn(return_value, thread->readTid()); -} - -#endif // FULL_SYSTEM - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscReg(int misc_reg, unsigned tid) -{ - return this->regFile.readMiscReg(misc_reg, tid); -} - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, - unsigned tid) -{ - return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) -{ - return this->regFile.setMiscReg(misc_reg, val, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, - unsigned tid) -{ - return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::squashFromTC(unsigned tid) -{ - this->thread[tid]->inSyscall = true; - this->commit.generateTCEvent(tid); -} - -#if FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::post_interrupt(int int_num, int index) -{ - BaseCPU::post_interrupt(int_num, index); - - if (this->thread[0]->status() == ThreadContext::Suspended) { - DPRINTF(IPI,"Suspended Processor awoke\n"); - this->threadContexts[0]->activate(); - } -} - -template <class Impl> -int -AlphaFullCPU<Impl>::readIntrFlag() -{ - return this->regFile.readIntrFlag(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setIntrFlag(int val) -{ - this->regFile.setIntrFlag(val); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::hwrei(unsigned tid) -{ - // Need to clear the lock flag upon returning from an interrupt. - this->lockFlag = false; - - this->thread[tid]->kernelStats->hwrei(); - - this->checkInterrupts = true; - - // FIXME: XXX check for interrupts? XXX - return NoFault; -} - -template <class Impl> -bool -AlphaFullCPU<Impl>::simPalCheck(int palFunc, unsigned tid) -{ - if (this->thread[tid]->kernelStats) - this->thread[tid]->kernelStats->callpal(palFunc, - this->threadContexts[tid]); - - switch (palFunc) { - case PAL::halt: - halt(); - if (--System::numSystemsRunning == 0) - exitSimLoop("all cpus halted"); - break; - - case PAL::bpt: - case PAL::bugchk: - if (this->system->breakpoint()) - return false; - break; - } - - return true; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid) -{ - // Pass the thread's TC into the invoke method. - fault->invoke(this->threadContexts[tid]); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::processInterrupts() -{ - // Check for interrupts here. For now can copy the code that - // exists within isa_fullsys_traits.hh. Also assume that thread 0 - // is the one that handles the interrupts. - // @todo: Possibly consolidate the interrupt checking code. - // @todo: Allow other threads to handle interrupts. - - // Check if there are any outstanding interrupts - //Handle the interrupts - int ipl = 0; - int summary = 0; - - this->checkInterrupts = false; - - if (this->readMiscReg(IPR_ASTRR, 0)) - panic("asynchronous traps not implemented\n"); - - if (this->readMiscReg(IPR_SIRR, 0)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - uint64_t interrupts = this->intr_status(); - - if (interrupts) { - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - } - - if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { - this->setMiscReg(IPR_ISR, summary, 0); - this->setMiscReg(IPR_INTID, ipl, 0); - // Checker needs to know these two registers were updated. - if (this->checker) { - this->checker->threadBase()->setMiscReg(IPR_ISR, summary); - this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); - } - this->trap(Fault(new InterruptFault), 0); - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - this->readMiscReg(IPR_IPLR, 0), ipl, summary); - } -} - -#endif // FULL_SYSTEM - -#if !FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid) -{ - DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); - - DPRINTF(Activity,"Activity: syscall() called.\n"); - - // Temporarily increase this by one to account for the syscall - // instruction. - ++(this->thread[tid]->funcExeInst); - - // Execute the actual syscall. - this->thread[tid]->syscall(callnum); - - // Decrease funcExeInst by one as the normal commit will handle - // incrementing it. - --(this->thread[tid]->funcExeInst); -} - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::getSyscallArg(int i, int tid) -{ - return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallArg(int i, IntReg val, int tid) -{ - this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) -{ - // check for error condition. Alpha syscall convention is to - // indicate success/failure in reg a3 (r19) and put the - // return value itself in the standard return value reg (v0). - if (return_value.successful()) { - // no error - this->setArchIntReg(SyscallSuccessReg, 0, tid); - this->setArchIntReg(ReturnValueReg, return_value.value(), tid); - } else { - // got an error, return details - this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); - this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); - } -} -#endif diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc new file mode 100644 index 000000000..0979c5c8f --- /dev/null +++ b/src/cpu/o3/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/o3/isa_specific.hh" + +// Explicit instantiation +template class BaseDynInst<O3CPUImpl>; + +template <> +int +BaseDynInst<O3CPUImpl>::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index b33543bdc..08fd4e8ea 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -29,11 +29,6 @@ */ #include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" +#include "cpu/o3/isa_specific.hh" -template class BPredUnit<AlphaSimpleImpl>; -template class BPredUnit<OzoneImpl>; -//template class BPredUnit<SimpleImpl>; +template class BPredUnit<O3CPUImpl>; diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc new file mode 100644 index 000000000..782d963b0 --- /dev/null +++ b/src/cpu/o3/checker_builder.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include <string> + +#include "cpu/checker/cpu_impl.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/alpha/dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class MemObject; + +template +class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >; + +/** + * Specific non-templated derived class used for SimObject configuration. + */ +class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > > +{ + public: + O3Checker(Params *p) + : Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// CheckerCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) + + Param<Counter> max_insts_any_thread; + Param<Counter> max_insts_all_threads; + Param<Counter> max_loads_any_thread; + Param<Counter> max_loads_all_threads; + +#if FULL_SYSTEM + SimObjectParam<AlphaITB *> itb; + SimObjectParam<AlphaDTB *> dtb; + SimObjectParam<System *> system; + Param<int> cpu_id; + Param<Tick> profile; +#else + SimObjectParam<Process *> workload; +#endif // FULL_SYSTEM + Param<int> clock; + + Param<bool> defer_registration; + Param<bool> exitOnError; + Param<bool> warnOnlyOnLoadError; + Param<bool> function_trace; + Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(O3Checker) + +BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(O3Checker) + + +CREATE_SIM_OBJECT(O3Checker) +{ + O3Checker::Params *params = new O3Checker::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = 0; + params->max_insts_all_threads = 0; + params->max_loads_any_thread = 0; + params->max_loads_all_threads = 0; + params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; + params->deferRegistration = defer_registration; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->clock = clock; + // Hack to touch all parameters. Consider not deriving Checker + // from BaseCPU..it's not really a CPU in the end. + Counter temp; + temp = max_insts_any_thread; + temp = max_insts_all_threads; + temp = max_loads_any_thread; + temp = max_loads_all_threads; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + O3Checker *cpu = new O3Checker(params); + return cpu; +} + +REGISTER_SIM_OBJECT("O3Checker", O3Checker) diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 770008a33..637d59f52 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" -template class DefaultCommit<AlphaSimpleImpl>; +template class DefaultCommit<O3CPUImpl>; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index b7404c488..956b6ec3e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_COMMIT_HH__ @@ -67,7 +68,7 @@ class DefaultCommit { public: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -145,7 +146,7 @@ class DefaultCommit void regStats(); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the list of threads. */ void setThreads(std::vector<Thread *> &threads); @@ -161,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -186,11 +183,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -280,12 +280,20 @@ class DefaultCommit /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } - /** Reads the PC of a specific thread. */ + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } +#if THE_ISA != ALPHA_ISA + /** Reads the next NPC of a specific thread. */ + uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } +#endif + private: /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -317,16 +325,12 @@ class DefaultCommit ROB *rob; private: - /** Pointer to FullCPU. */ - FullCPU *cpu; + /** Pointer to O3CPU. */ + O3CPU *cpu; /** Vector of all of the threads. */ std::vector<Thread *> thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -365,11 +369,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; @@ -379,8 +378,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; @@ -390,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ @@ -402,6 +397,11 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** The next NPC of each thread. */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ceb2918e0..904af1071 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -28,16 +28,22 @@ * Authors: Kevin Lim */ +#include "config/full_system.hh" +#include "config/use_checker.hh" + #include <algorithm> #include <string> #include "base/loader/symtab.hh" #include "base/timebuf.hh" -#include "cpu/checker/cpu.hh" #include "cpu/exetrace.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; template <class Impl> @@ -72,13 +78,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -118,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -205,19 +206,6 @@ DefaultCommit<Impl>::regStats() .flags(total) ; - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") @@ -233,17 +221,16 @@ DefaultCommit<Impl>::regStats() template <class Impl> void -DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); cpu = cpu_ptr; // Commit must broadcast the number of free entries it has at the start of // the simulation, so it starts as active. - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -302,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -358,23 +338,38 @@ DefaultCommit<Impl>::initStage() } template <class Impl> -void -DefaultCommit<Impl>::switchOut() +bool +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ + drainPending = false; +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -409,10 +404,10 @@ DefaultCommit<Impl>::updateStatus() if (_nextStatus == Inactive && _status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::CommitIdx); + cpu->deactivateStage(O3CPU::CommitIdx); } else if (_nextStatus == Active && _status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); } _status = _nextStatus; @@ -434,7 +429,7 @@ DefaultCommit<Impl>::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -539,8 +534,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template <class Impl> @@ -558,8 +551,6 @@ DefaultCommit<Impl>::squashFromTC(unsigned tid) cpu->activityThisCycle(); tcSquash[tid] = false; - - ++squashCounter; } template <class Impl> @@ -569,11 +560,15 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } + if ((*activeThreads).size() <= 0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -585,10 +580,12 @@ DefaultCommit<Impl>::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -694,29 +691,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -744,8 +719,6 @@ DefaultCommit<Impl>::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; @@ -947,7 +920,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || @@ -1012,18 +985,19 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->setCompleted(); } +#if USE_CHECKER // Use checker prior to updating anything due to traps or PC // based events. if (cpu->checker) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); if (inst_fault != NoFault) { head_inst->setCompleted(); -#if FULL_SYSTEM DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1032,9 +1006,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } +#if USE_CHECKER if (cpu->checker && head_inst->isStore()) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif assert(!thread[tid]->inSyscall); @@ -1065,10 +1041,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) generateTrapEvent(tid); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); -#endif // FULL_SYSTEM } updateComInstStats(head_inst); @@ -1256,7 +1228,8 @@ DefaultCommit<Impl>::roundRobin() unsigned tid = *pri_iter; if (commitStatus[tid] == Running || - commitStatus[tid] == Idle) { + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { if (rob->isHeadReady(tid)) { priority_list.erase(pri_iter); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 788c6b164..b407f4fcc 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -26,9 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" +#include "config/use_checker.hh" #if FULL_SYSTEM #include "sim/system.hh" @@ -37,26 +39,28 @@ #endif #include "cpu/activity.hh" -#include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/cpu.hh" #include "sim/root.hh" #include "sim/stat_control.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; using namespace TheISA; -BaseFullCPU::BaseFullCPU(Params *params) +BaseO3CPU::BaseO3CPU(Params *params) : BaseCPU(params), cpu_id(0) { } void -BaseFullCPU::regStats() +BaseO3CPU::regStats() { BaseCPU::regStats(); } @@ -82,8 +86,67 @@ FullO3CPU<Impl>::TickEvent::description() } template <class Impl> +FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::ActivateThreadEvent::description() +{ + return "FullO3CPU \"Activate Thread\" event"; +} + +template <class Impl> +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + +template <class Impl> FullO3CPU<Impl>::FullO3CPU(Params *params) - : BaseFullCPU(params), + : BaseO3CPU(params), tickEvent(this), removeInstsThisCycle(false), fetch(params), @@ -94,7 +157,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(params->numberOfThreads,//number of activeThreads + freeList(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs), @@ -102,21 +165,20 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) params->smtROBPolicy, params->smtROBThreshold, params->numberOfThreads), - scoreboard(params->numberOfThreads,//number of activeThreads + scoreboard(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs, TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -125,21 +187,25 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { _status = Idle; + checker = NULL; + if (params->checker) { +#if USE_CHECKER BaseCPU *temp_checker = params->checker; checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); checker->setMemory(mem); #if FULL_SYSTEM checker->setSystem(params->system); #endif - } else { - checker = NULL; +#else + panic("Checker enabled but not compiled in!"); +#endif // USE_CHECKER } #if !FULL_SYSTEM @@ -177,13 +243,18 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); #if !FULL_SYSTEM int active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " + "edit your workload size."); + } #else int active_threads = 1; #endif @@ -249,6 +320,8 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) lastRunningCycle = curTick; + lastActivatedCycle = -1; + contextSwitch = false; } @@ -261,9 +334,9 @@ template <class Impl> void FullO3CPU<Impl>::fullCPURegStats() { - BaseFullCPU::regStats(); + BaseO3CPU::regStats(); - // Register any of the FullCPU's stats here. + // Register any of the O3CPU's stats here. timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -316,10 +389,22 @@ FullO3CPU<Impl>::fullCPURegStats() } template <class Impl> +Port * +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void FullO3CPU<Impl>::tick() { - DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); ++numCycles; @@ -355,7 +440,8 @@ FullO3CPU<Impl>::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -416,16 +502,107 @@ FullO3CPU<Impl>::init() template <class Impl> void +FullO3CPU<Impl>::activateThread(unsigned tid) +{ + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int tid) +{ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); +} + +template <class Impl> +void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -445,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -458,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -475,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -483,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -511,37 +687,37 @@ template <class Impl> void FullO3CPU<Impl>::activateWhenReady(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" + DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming" "(e.g. PhysRegs/ROB/IQ/LSQ) \n", tid); bool ready = true; if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Int. Regs.\n", tid); ready = false; } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Float. Regs.\n", tid); ready = false; } else if (commit.rob->numFreeEntries() >= commit.rob->entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "ROB entries.\n", tid); ready = false; } else if (iew.instQueue.numFreeEntries() >= iew.instQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "IQ entries.\n", tid); ready = false; } else if (iew.ldstQueue.numFreeEntries() >= iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "LSQ entries.\n", tid); ready = false; @@ -559,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } @@ -566,133 +743,130 @@ FullO3CPU<Impl>::activateWhenReady(int tid) template <class Impl> void -FullO3CPU<Impl>::activateContext(int tid, int delay) +FullO3CPU<Impl>::serialize(std::ostream &os) { - // Needs to set each stage to running as well. - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", - tid); - - activeThreads.push_back(tid); + SERIALIZE_ENUM(_status); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); } - - assert(_status == Idle || _status == SwitchedOut); - - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - _status = Running; } template <class Impl> void -FullO3CPU<Impl>::suspendContext(int tid) +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) { - DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + UNSERIALIZE_ENUM(_status); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); + } +} - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); +template <class Impl> +unsigned int +FullO3CPU<Impl>::drain(Event *drain_event) +{ + drainCount = 0; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; } -*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int tid) +FullO3CPU<Impl>::resume() { - DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + assert(system->getMemoryMode() == System::Timing); + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Running); - removeThread(tid); - } -*/ + if (_status == SwitchedOut || _status == Idle) + return; + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::haltContext(int tid) +FullO3CPU<Impl>::signalDrained() { - DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Drained); - removeThread(tid); + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } -*/ + assert(drainCount <= 5); } template <class Impl> void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +FullO3CPU<Impl>::switchOut() { - sampler = _sampler; - switchCount = 0; fetch.switchOut(); - decode.switchOut(); rename.switchOut(); - iew.switchOut(); commit.switchOut(); - - // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); -} - -template <class Impl> -void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } - - if (checker) - checker->switchOut(sampler); - - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); } - assert(switchCount <= 5); + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif } template <class Impl> @@ -700,7 +874,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -730,7 +904,7 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) if (isActive == activeThreads.end()) { //May Need to Re-code this if the delay variable is the delay //needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); @@ -922,6 +1096,22 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } +#if THE_ISA != ALPHA_ISA +template <class Impl> +uint64_t +FullO3CPU<Impl>::readNextNPC(unsigned tid) +{ + return commit.readNextNPC(tid); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid) +{ + commit.setNextNPC(val, tid); +} +#endif + template <class Impl> typename FullO3CPU<Impl>::ListIt FullO3CPU<Impl>::addInst(DynInstPtr &inst) @@ -958,7 +1148,7 @@ template <class Impl> void FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) { - DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " + DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x " "[sn:%lli]\n", inst->threadNumber, inst->readPC(), inst->seqNum); @@ -972,7 +1162,7 @@ template <class Impl> void FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) { - DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" + DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); ListIt end_it; @@ -982,12 +1172,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) if (instList.empty()) { return; } else if (rob.isEmpty(/*tid*/)) { - DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); + DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); end_it = instList.begin(); rob_empty = true; } else { end_it = (rob.readTailInst(tid))->getInstListIt(); - DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); + DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); } removeInstsThisCycle = true; @@ -1026,7 +1216,7 @@ FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, inst_iter--; - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + DPRINTF(O3CPU, "Deleting instructions from instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1048,7 +1238,7 @@ inline void FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid) { if ((*instIt)->threadNumber == tid) { - DPRINTF(FullCPU, "FullCPU: Squashing instruction, " + DPRINTF(O3CPU, "Squashing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*instIt)->threadNumber, (*instIt)->seqNum, @@ -1069,7 +1259,7 @@ void FullO3CPU<Impl>::cleanUpRemovedInsts() { while (!removeList.empty()) { - DPRINTF(FullCPU, "FullCPU: Removing instruction, " + DPRINTF(O3CPU, "Removing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, @@ -1185,4 +1375,4 @@ FullO3CPU<Impl>::updateThreadPriority() } // Forward declaration of FullO3CPU. -template class FullO3CPU<AlphaSimpleImpl>; +template class FullO3CPU<O3CPUImpl>; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ff41a3306..83cb966e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_CPU_HH__ @@ -48,24 +49,33 @@ #include "cpu/o3/cpu_policy.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/thread_state.hh" +//#include "cpu/o3/thread_context.hh" #include "sim/process.hh" template <class> class Checker; class ThreadContext; +template <class> +class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; -class BaseFullCPU : public BaseCPU +class BaseO3CPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: typedef BaseCPU::Params Params; - BaseFullCPU(Params *params); + BaseO3CPU(Params *params); void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ int readCpuId() { return cpu_id; } protected: @@ -78,7 +88,7 @@ class BaseFullCPU : public BaseCPU * tick() function for the CPU is defined here. */ template <class Impl> -class FullO3CPU : public BaseFullCPU +class FullO3CPU : public BaseO3CPU { public: typedef TheISA::FloatReg FloatReg; @@ -93,6 +103,8 @@ class FullO3CPU : public BaseFullCPU typedef typename std::list<DynInstPtr>::iterator ListIt; + friend class O3ThreadContext<Impl>; + public: enum Status { Running, @@ -105,6 +117,9 @@ class FullO3CPU : public BaseFullCPU /** Overall CPU status. */ Status _status; + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + private: class TickEvent : public Event { @@ -141,6 +156,92 @@ class FullO3CPU : public BaseFullCPU tickEvent.squash(); } + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule thread to activate , regardless of its current state. */ + void scheduleActivateThreadEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + else if (!activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void unscheduleActivateThreadEvent(int tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -150,6 +251,9 @@ class FullO3CPU : public BaseFullCPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ @@ -158,6 +262,16 @@ class FullO3CPU : public BaseFullCPU /** Initialize the CPU */ void init(); + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + + /** Add Thread to Active Threads List */ + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); + /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -184,7 +298,7 @@ class FullO3CPU : public BaseFullCPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -200,6 +314,13 @@ class FullO3CPU : public BaseFullCPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific @@ -207,14 +328,21 @@ class FullO3CPU : public BaseFullCPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -299,6 +427,12 @@ class FullO3CPU : public BaseFullCPU /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid); + /** Reads the next NPC of a specific thread. */ + uint64_t readNextNPC(unsigned tid); + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(uint64_t val, unsigned tid); + /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ @@ -481,11 +615,11 @@ class FullO3CPU : public BaseFullCPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; @@ -507,6 +641,9 @@ class FullO3CPU : public BaseFullCPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + /** Number of Threads CPU can process */ unsigned numThreads; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 4924f018a..896e38331 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" -template class DefaultDecode<AlphaSimpleImpl>; +template class DefaultDecode<O3CPUImpl>; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index ff88358d6..7f5ecbc26 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -48,7 +48,7 @@ class DefaultDecode { private: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -95,7 +95,7 @@ class DefaultDecode void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -189,7 +195,7 @@ class DefaultDecode private: // Interfaces to objects outside of decode. /** CPU interface. */ - FullCPU *cpu; + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8a6ea6626..8b851c032 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -112,7 +112,7 @@ DefaultDecode<Impl>::regStats() template<class Impl> void -DefaultDecode<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Decode, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) } template <class Impl> -void -DefaultDecode<Impl>::switchOut() +bool +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); + return true; } template <class Impl> @@ -296,7 +297,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -345,7 +346,7 @@ DefaultDecode<Impl>::squash(unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } @@ -427,7 +428,7 @@ DefaultDecode<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::DecodeIdx); + cpu->activateStage(O3CPU::DecodeIdx); } } else { // If it's not unblocking, then decode will not have any internal @@ -436,7 +437,7 @@ DefaultDecode<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::DecodeIdx); + cpu->deactivateStage(O3CPU::DecodeIdx); } } } @@ -515,7 +516,7 @@ DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); + DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. decodeStatus[tid] = Squashing; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh index 3659b1a37..c19fd0abf 100644 --- a/src/cpu/o3/dep_graph.hh +++ b/src/cpu/o3/dep_graph.hh @@ -68,6 +68,8 @@ class DependencyGraph : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + ~DependencyGraph(); + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); @@ -121,6 +123,12 @@ class DependencyGraph }; template <class DynInstPtr> +DependencyGraph<DynInstPtr>::~DependencyGraph() +{ + delete [] dependGraph; +} + +template <class DynInstPtr> void DependencyGraph<DynInstPtr>::resize(int num_entries) { diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 5f52d0fca..d809b07e4 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" -template class DefaultFetch<AlphaSimpleImpl>; +template class DefaultFetch<O3CPUImpl>; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 76b32de68..931919af8 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_FETCH_HH__ @@ -35,12 +36,10 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -57,7 +56,7 @@ class DefaultFetch typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; /** Typedefs from the CPU policy. */ @@ -163,8 +162,11 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); @@ -181,11 +183,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + bool drain(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -296,8 +301,8 @@ class DefaultFetch int branchCount(); private: - /** Pointer to the FullCPU. */ - FullCPU *cpu; + /** Pointer to the O3CPU. */ + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -335,6 +340,15 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** Per-thread next Next PC. + * This is not a real register but is used for + * architectures that use a branch-delay slot. + * (such as MIPS or Sparc) + */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -390,6 +404,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; @@ -413,6 +433,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; @@ -421,6 +444,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + /** Total number of fetched branches. */ Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0a2a5d09..4184e1867 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -26,8 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "arch/utility.hh" #include "cpu/checker/cpu.hh" @@ -106,13 +109,12 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); - DPRINTF(Fetch, "Fetch constructor called\n"); - // Set fetch stage's status to inactive. _status = Inactive; @@ -125,6 +127,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Figure out fetch policy if (policy == "singlethread") { fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); } else if (policy == "roundrobin") { fetchPolicy = RoundRobin; DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); @@ -158,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -268,7 +274,7 @@ DefaultFetch<Impl>::regStats() template<class Impl> void -DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; @@ -276,13 +282,11 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - +#if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); } +#endif // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. @@ -330,6 +334,9 @@ DefaultFetch<Impl>::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); +#if THE_ISA != ALPHA_ISA + nextNPC[tid] = cpu->readNextNPC(tid); +#endif } } @@ -349,18 +356,22 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) ++fetchIcacheSquashes; delete pkt->req; delete pkt; - memReq[tid] = NULL; return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - switchToActive(); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -376,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } template <class Impl> +bool +DefaultFetch<Impl>::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::resume() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + drainPending = false; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::switchOut() { + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -404,6 +424,9 @@ DefaultFetch<Impl>::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); +#if THE_ISA != ALPHA_ISA + nextNPC[i] = cpu->readNextNPC(i); +#endif fetchStatus[i] = Running; } numInst = 0; @@ -430,7 +453,7 @@ DefaultFetch<Impl>::switchToActive() if (_status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); _status = Active; } @@ -443,7 +466,7 @@ DefaultFetch<Impl>::switchToInactive() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); _status = Inactive; } @@ -488,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -499,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -530,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -549,7 +580,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid return false; } - DPRINTF(Fetch, "Doing cache access.\n"); + DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); lastIcacheStall[tid] = curTick; @@ -662,7 +693,7 @@ DefaultFetch<Impl>::updateFetchStatus() "completion\n",tid); } - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); } return Active; @@ -673,7 +704,7 @@ DefaultFetch<Impl>::updateFetchStatus() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); } return Inactive; @@ -714,12 +745,15 @@ DefaultFetch<Impl>::tick() // Reset the number of the instruction we're fetching. numInst = 0; +#if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } + if (fromCommit->commitInfo[0].clearInterrupt) { interruptPending = false; } +#endif for (threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { @@ -817,7 +851,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. fetchStatus[tid] = Squashing; @@ -885,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() @@ -893,6 +927,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) return; } + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + // The current PC. Addr &fetch_PC = PC[tid]; @@ -915,7 +951,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); if (!fetch_success) { - ++fetchMiscStallCycles; + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } return; } } else { @@ -984,11 +1024,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, next_PC, inst_seq, cpu); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " "[sn:%lli]\n", @@ -1020,7 +1060,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", + warn("cycle %lli: Quiesce instruction encountered, halting fetch!", curTick); fetchStatus[tid] = QuiescePending; ++numInst; @@ -1041,8 +1081,17 @@ DefaultFetch<Impl>::fetch(bool &status_change) if (fault == NoFault) { DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); +#if THE_ISA == ALPHA_ISA + PC[tid] = next_PC; + nextPC[tid] = next_PC + instSize; +#else PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; + nextPC[tid] = next_PC + instSize; + + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) @@ -1065,11 +1114,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) next_PC, inst_seq, cpu); instruction->setPredTarg(next_PC + instSize); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); instruction->traceData = NULL; @@ -1085,9 +1134,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #endif // FULL_SYSTEM } } @@ -1256,6 +1305,6 @@ int DefaultFetch<Impl>::branchCount() { list<unsigned>::iterator threads = (*activeThreads).begin(); - + panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 545deea9b..42e329aca 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -31,7 +31,7 @@ #include <sstream> #include "cpu/o3/fu_pool.hh" -#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/func_unit.hh" #include "sim/builder.hh" using namespace std; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 8145f4cc7..f99be7fe0 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" -template class DefaultIEW<AlphaSimpleImpl>; +template class DefaultIEW<O3CPUImpl>; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2e61af5fc..fb9afde54 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -68,7 +68,7 @@ class DefaultIEW //Typedefs from Impl typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; typedef typename CPUPol::IQ IQ; @@ -80,7 +80,7 @@ class DefaultIEW typedef typename CPUPol::RenameStruct RenameStruct; typedef typename CPUPol::IssueStruct IssueStruct; - friend class Impl::FullCPU; + friend class Impl::O3CPU; friend class CPUPol::IQ; public: @@ -125,8 +125,11 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets main time buffer used for backwards communication. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -143,11 +146,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +210,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#if DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -261,6 +306,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -328,7 +376,7 @@ class DefaultIEW private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** Records if IEW has written to the time buffer this cycle, so that the * CPU can deschedule itself if there is no activity. @@ -381,20 +429,12 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -405,6 +445,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; @@ -439,14 +490,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -456,28 +499,25 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -490,7 +530,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3929f2e19..684ae2295 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,17 +42,17 @@ using namespace std; template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -75,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -94,6 +98,7 @@ DefaultIEW<Impl>::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -139,20 +144,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -171,114 +162,105 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".EXEC:insts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".EXEC:loads") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".EXEC:squashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; -/* - for (int i=0; i<Num_OpClasses; ++i) { - stringstream subname; - subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); - } -*/ - // - // Other stats - // + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; + + iewExecRate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate") + .flags(total); + + iewExecRate = iewExecutedInsts / cpu->numCycles; iewInstsToCommit .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -299,7 +281,7 @@ DefaultIEW<Impl>::initStage() template<class Impl> void -DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(IEW, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -307,7 +289,7 @@ DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) instQueue.setCPU(cpu_ptr); ldstQueue.setCPU(cpu_ptr); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template<class Impl> @@ -371,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) } template <class Impl> +bool +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -423,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -456,16 +445,7 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template<class Impl> @@ -591,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -611,7 +591,7 @@ DefaultIEW<Impl>::validInstsFromRename() unsigned inst_count = 0; for (int i=0; i<fromRename->size; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } @@ -799,10 +779,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -853,6 +835,22 @@ DefaultIEW<Impl>::sortInsts() template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -871,7 +869,7 @@ inline void DefaultIEW<Impl>::activateStage() { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template <class Impl> @@ -879,7 +877,7 @@ inline void DefaultIEW<Impl>::deactivateStage() { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::IEWIdx); + cpu->deactivateStage(O3CPU::IEWIdx); } template<class Impl> @@ -951,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1090,7 +1088,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1203,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1365,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } @@ -1501,9 +1502,9 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else - iewExecutedInsts++; + iewIewExecutedcutedInsts++; #else iewExecutedInsts++; #endif @@ -1512,13 +1513,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index f2c6b8213..a539066f9 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template class InstructionQueue<AlphaSimpleImpl>; +template class InstructionQueue<O3CPUImpl>; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 60a713020..4c69ca384 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ @@ -68,7 +69,7 @@ class InstructionQueue { public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; @@ -80,7 +81,7 @@ class InstructionQueue // Typedef of iterator through the list of instructions. typedef typename std::list<DynInstPtr>::iterator ListIt; - friend class Impl::FullCPU; + friend class Impl::O3CPU; /** FU completion event class. */ class FUCompletion : public Event { @@ -125,7 +126,7 @@ class InstructionQueue void resetState(); /** Sets CPU pointer. */ - void setCPU(FullCPU *_cpu) { cpu = _cpu; } + void setCPU(O3CPU *_cpu) { cpu = _cpu; } /** Sets active threads list. */ void setActiveThreads(std::list<unsigned> *at_ptr); @@ -252,7 +253,7 @@ class InstructionQueue ///////////////////////// /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Cache interface. */ MemInterface *dcacheInterface; @@ -474,12 +475,17 @@ class InstructionQueue /** Stat for number of non-speculative instructions removed due to a squash. */ Stats::Scalar<> iqSquashedNonSpecRemoved; + // Also include number of instructions rescheduled and replayed. - /** Distribution of number of instructions in the queue. */ + /** Distribution of number of instructions in the queue. + * @todo: Need to create struct to track the entry time for each + * instruction. */ Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; - /** Distribution of the cycles it takes to issue an instruction. */ + /** Distribution of the cycles it takes to issue an instruction. + * @todo: Need to create struct to track the ready time for each + * instruction. */ Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a @@ -492,8 +498,7 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; + /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 06a052c6f..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include <limits> @@ -125,7 +126,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = part_amt; } - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" "%i entries per thread.\n",part_amt); } else if (policy == "threshold") { @@ -140,7 +141,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = thresholdIQ; } - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + DPRINTF(IQ, "IQ sharing policy set to Threshold:" "%i entries per thread.\n",thresholdIQ); } else { assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," @@ -289,22 +290,7 @@ InstructionQueue<Impl>::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") @@ -701,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -791,13 +778,14 @@ InstructionQueue<Impl>::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; @@ -1097,7 +1085,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh new file mode 100755 index 000000000..f8a9dd8cc --- /dev/null +++ b/src/cpu/o3/isa_specific.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#include "cpu/base.hh" + +#if THE_ISA == ALPHA_ISA + #include "cpu/o3/alpha/cpu.hh" + #include "cpu/o3/alpha/impl.hh" + #include "cpu/o3/alpha/params.hh" + #include "cpu/o3/alpha/dyn_inst.hh" +#else + #error "O3CPU doesnt support this ISA" +#endif diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index de0325920..527947281 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -28,11 +28,9 @@ * Authors: Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ<AlphaSimpleImpl>; +template class LSQ<O3CPUImpl>; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index bc4154c85..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -44,7 +44,7 @@ template <class Impl> class LSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; @@ -62,10 +62,20 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers statistics of each LSQ unit. */ + void regStats(); + + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr); /** Switches out the LSQ. */ @@ -248,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -264,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -272,7 +333,7 @@ class LSQ { LSQUnit thread[Impl::MaxThreads]; /** The CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** The IEW stage pointer. */ IEW *iewStage; @@ -293,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 27aa0dc3c..4e3957029 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,53 @@ using namespace std; template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + +template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -108,6 +153,16 @@ LSQ<Impl>::name() const template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) { activeThreads = at_ptr; @@ -116,10 +171,12 @@ LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) template<class Impl> void -LSQ<Impl>::setCPU(FullCPU *cpu_ptr) +LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -492,6 +549,9 @@ LSQ<Impl>::hasStoresToWB() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index e935ffa5c..3ca3fa667 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -29,11 +29,9 @@ * Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit<AlphaSimpleImpl>; +template class LSQUnit<O3CPUImpl>; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ce0cdd36f..a76a73f0c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -40,7 +40,7 @@ #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" /** @@ -61,9 +61,10 @@ class LSQUnit { typedef TheISA::IntReg IntReg; public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,19 +72,26 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -125,11 +133,10 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ void completeDataAccess(PacketPtr pkt); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -204,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -214,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -232,59 +239,16 @@ class LSQUnit { private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; - - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - FullCPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(FullCPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); + /** Pointer to the LSQ. */ + LSQ *lsq; - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -443,25 +407,35 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; + + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; + + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template <class T> @@ -517,8 +491,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->getFlags() & UNCACHEABLE && - (load_idx != loadHead || !load_inst->reachedCommit)) { + (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -598,7 +573,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. wb->schedule(curTick); - // Should keep track of stat for forwarded data + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || @@ -626,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -633,12 +609,13 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); + ++lsqBlockedLoads; return NoFault; } } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -646,9 +623,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -658,8 +632,19 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { + ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 6f32ec304..85b150cd9 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -29,11 +29,18 @@ * Korey Sewell */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" +#include "mem/packet.hh" #include "mem/request.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + template<class Impl> LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) @@ -71,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; @@ -89,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -138,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -161,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -170,18 +138,15 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, template<class Impl> void -LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr) +LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); +#if USE_CHECKER if (cpu->checker) { cpu->checker->setDcachePort(dcachePort); } +#endif } template<class Impl> @@ -197,6 +162,47 @@ LSQUnit<Impl>::name() const template<class Impl> void +LSQUnit<Impl>::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template<class Impl> +void LSQUnit<Impl>::clearLQ() { loadQueue.clear(); @@ -542,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -617,7 +623,7 @@ LSQUnit<Impl>::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; - + ++lsqCacheBlocked; assert(retryPkt == NULL); retryPkt = data_pkt; } else { @@ -668,7 +674,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -676,6 +682,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) loadTail = load_idx; decrLdIdx(load_idx); + ++lsqSquashedLoads; } if (isLoadBlocked) { @@ -711,7 +718,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; @@ -722,6 +729,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } @@ -744,9 +752,11 @@ LSQUnit<Impl>::storePostSend(Packet *pkt) // only works so long as the checker doesn't try to // verify the value in memory for stores. storeQueue[storeWBIdx].inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[storeWBIdx].inst); + cpu->checker->verify(storeQueue[storeWBIdx].inst); } +#endif } if (pkt->result != Packet::Success) { @@ -781,6 +791,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); + ++lsqIgnoredResponses; return; } @@ -839,9 +850,11 @@ LSQUnit<Impl>::completeStore(int store_idx) // Tell the checker we've completed this instruction. Some stores // may get reported twice to the checker, but the checker can // handle that case. +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[store_idx].inst); + cpu->checker->verify(storeQueue[store_idx].inst); } +#endif } template <class Impl> @@ -857,6 +870,8 @@ LSQUnit<Impl>::recvRetry() isStoreBlocked = false; } else { // Still blocked! + ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index a95103266..6a14dcbff 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -28,23 +28,22 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/store_set.hh" #include "cpu/o3/mem_dep_unit_impl.hh" // Force instantation of memory dependency unit using store sets and -// AlphaSimpleImpl. -template class MemDepUnit<StoreSet, AlphaSimpleImpl>; +// O3CPUImpl. +template class MemDepUnit<StoreSet, O3CPUImpl>; #ifdef DEBUG template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0; #endif diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh index f3cf36887..1c234bcd7 100644..100755 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/params.hh @@ -28,32 +28,28 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_PARAMS_HH__ -#define __CPU_O3_ALPHA_PARAMS_HH__ +#ifndef __CPU_O3_PARAMS_HH__ +#define __CPU_O3_PARAMS_HH__ #include "cpu/o3/cpu.hh" //Forward declarations -class AlphaDTB; -class AlphaITB; class FUPool; -class MemObject; -class Process; -class System; /** - * This file defines the parameters that will be used for the AlphaFullCPU. + * This file defines the parameters that will be used for the O3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ - -class AlphaSimpleParams : public BaseFullCPU::Params +class O3Params : public BaseO3CPU::Params { public: + unsigned activity; -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else + // + // Pointers to key objects + // +#if !FULL_SYSTEM std::vector<Process *> workload; Process *process; #endif // FULL_SYSTEM @@ -62,13 +58,11 @@ class AlphaSimpleParams : public BaseFullCPU::Params BaseCPU *checker; - unsigned activity; - // // Caches // -// MemInterface *icacheInterface; -// MemInterface *dcacheInterface; + // MemInterface *icacheInterface; + // MemInterface *dcacheInterface; unsigned cachePorts; @@ -104,12 +98,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -123,6 +115,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index ade5e4e56..b6677b4b1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -72,7 +72,7 @@ class PhysRegFile // Will make these registers public for now, but they probably should // be private eventually with some accessor functions. public: - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** * Constructs a physical register file with the specified amount of @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { @@ -278,11 +274,11 @@ class PhysRegFile private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; } /** Number of physical integer registers. */ unsigned numPhysicalIntRegs; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9ca8e82c6..443ada0cb 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" -template class DefaultRename<AlphaSimpleImpl>; +template class DefaultRename<O3CPUImpl>; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 42fdf6bf5..034087feb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -55,7 +55,7 @@ class DefaultRename // Typedefs from the Impl. typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; // Typedefs from the CPUPol @@ -115,7 +115,7 @@ class DefaultRename void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -291,7 +294,7 @@ class DefaultRename std::list<RenameHistory> historyBuffer[Impl::MaxThreads]; /** Pointer to CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to main time buffer used for backwards communication. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df33b98ee..805a72808 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -162,7 +162,7 @@ DefaultRename<Impl>::regStats() template <class Impl> void -DefaultRename<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Rename, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) } template <class Impl> -void -DefaultRename<Impl>::switchOut() +bool +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); + return true; } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { @@ -341,7 +342,7 @@ DefaultRename<Impl>::squash(unsigned tid) for (int i=0; i<fromDecode->size; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -755,7 +756,7 @@ DefaultRename<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::RenameIdx); + cpu->activateStage(O3CPU::RenameIdx); } } else { // If it's not unblocking, then rename will not have any internal @@ -764,7 +765,7 @@ DefaultRename<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::RenameIdx); + cpu->deactivateStage(O3CPU::RenameIdx); } } } @@ -1022,7 +1023,7 @@ DefaultRename<Impl>::validInsts() unsigned inst_count = 0; for (int i=0; i<fromDecode->size; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } @@ -1206,7 +1207,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." - " Adding to front of list.", tid); + " Adding to front of list.\n", tid); serializeInst[tid] = NULL; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index f99e5ccfd..9976049cd 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -29,9 +29,8 @@ * Nathan Binkert */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. -template class ROB<AlphaSimpleImpl>; +template class ROB<O3CPUImpl>; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6d1402531..7cd5a5143 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_ROB_HH__ @@ -45,7 +46,7 @@ class ROB typedef TheISA::RegIndex RegIndex; public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; @@ -90,7 +91,7 @@ class ROB * is created within. * @param cpu_ptr Pointer to the implementation specific full CPU object. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets pointer to the list of active threads. * @param at_ptr Pointer to the list of active threads. @@ -257,7 +258,7 @@ class ROB private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Active Threads in CPU */ std::list<unsigned>* activeThreads; @@ -307,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 97694e371..1b9f666b8 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" @@ -40,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -100,7 +101,7 @@ ROB<Impl>::name() const template <class Impl> void -ROB<Impl>::setCPU(FullCPU *cpu_ptr) +ROB<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; @@ -276,7 +277,7 @@ ROB<Impl>::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); @@ -351,11 +352,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -370,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -407,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -519,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -543,6 +544,7 @@ ROB<Impl>::readHeadInst() } } */ + template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -557,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } + /* template <class Impl> uint64_t diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh new file mode 100755 index 000000000..df8d1a6d8 --- /dev/null +++ b/src/cpu/o3/thread_context.hh @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_THREAD_CONTEXT_HH__ +#define __CPU_O3_THREAD_CONTEXT_HH__ + +#include "cpu/o3/isa_specific.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * Derived ThreadContext class for use with the O3CPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the O3CPU because + * not all architectural state is located within the O3ThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template <class Impl> +class O3ThreadContext : public ThreadContext +{ + public: + typedef typename Impl::O3CPU O3CPU; + + /** Pointer to the CPU. */ + O3CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + O3ThreadState<Impl> *thread; + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Sets this CPU's ID. */ + virtual void setCpuId(int id) { cpu->setCpuId(id); } + + /** Reads this CPU's ID. */ + virtual int readCpuId() { return cpu->readCpuId(); } + +#if FULL_SYSTEM + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + + /** Returns a pointer to physical memory. */ + virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + /** Returns a pointer to this thread's kernel statistics. */ + virtual Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); + + void delVirtPort(VirtualPort *vp); +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(); + + /** Set the status to Unallocated. */ + virtual void deallocate(int delay = 0); + + /** Set the status to Halted. */ + virtual void halt(); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns this thread's ID number. */ + virtual int getThreadNum() { return thread->readTid(); } + + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->readTid()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->readTid()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->readTid()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } + + /** Sets a misc. register. */ + virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Gets a syscall argument by index. */ + virtual IntReg getSyscallArg(int i); + + /** Sets a syscall argument. */ + virtual void setSyscallArg(int i, IntReg val); + + /** Sets the syscall return value. */ + virtual void setSyscallReturn(SyscallReturn return_value); + + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->readTid()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#endif +}; + +#endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh new file mode 100755 index 000000000..bf8cbf850 --- /dev/null +++ b/src/cpu/o3/thread_context_impl.hh @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/quiesce_event.hh" + +using namespace TheISA; + +#if FULL_SYSTEM +template <class Impl> +VirtualPort * +O3ThreadContext<Impl>::getVirtPort(ThreadContext *src_tc) +{ + if (!src_tc) + return thread->getVirtPort(); + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("tc-vport", src_tc); + mem_port = cpu->system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +template <class Impl> +void +O3ThreadContext<Impl>::dumpFuncProfile() +{ + // Currently not supported +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Unallocated); + + thread->inSyscall = false; + thread->trapPending = false; +} + +#if FULL_SYSTEM +template <class Impl> +void +O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::activate(int delay) +{ + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + if (thread->status() == ThreadContext::Unallocated) { + cpu->activateWhenReady(thread->readTid()); + return; + } + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::suspend() +{ + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::deallocate(int delay) +{ + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Unallocated) + return; + + thread->setStatus(ThreadContext::Unallocated); + cpu->deallocateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::halt() +{ + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template <class Impl> +void +O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastActivate() +{ + return thread->lastActivate; +} + +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastSuspend() +{ + return thread->lastSuspend; +} + +template <class Impl> +void +O3ThreadContext<Impl>::profileClear() +{} + +template <class Impl> +void +O3ThreadContext<Impl>::profileSample() +{} +#endif + +template <class Impl> +TheISA::MachInst +O3ThreadContext<Impl>:: getInst() +{ + return thread->getInst(); +} + +template <class Impl> +void +O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + unsigned tid = thread->readTid(); + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, cpu->readIntReg(renamed_reg), + tc->readIntReg(i)); + + cpu->setIntReg(renamed_reg, tc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + tc->readFloatRegBits(i)); + } + + // Copy the misc regs. + copyMiscRegs(tc, this); + + // Then finally set the PC and the next PC. + cpu->setPC(tc->readPC(), tid); + cpu->setNextPC(tc->readNextPC(), tid); +#if !FULL_SYSTEM + this->thread->funcExeInst = tc->readFuncExeInst(); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::clearArchRegs() +{} + +template <class Impl> +uint64_t +O3ThreadContext<Impl>::readIntReg(int reg_idx) +{ + return cpu->readArchIntReg(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width) +{ + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); + default: + panic("Unsupported width!"); + return 0; + } +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx) +{ + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the TC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx) +{ + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + cpu->setArchIntReg(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) +{ + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); + break; + } + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val) +{ + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the TC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setPC(uint64_t val) +{ + cpu->setPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, + thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +#if !FULL_SYSTEM + +template <class Impl> +TheISA::IntReg +O3ThreadContext<Impl>::getSyscallArg(int i) +{ + return cpu->getSyscallArg(i, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallArg(int i, IntReg val) +{ + cpu->setSyscallArg(i, val, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallReturn(SyscallReturn return_value) +{ + cpu->setSyscallReturn(return_value, thread->readTid()); +} + +#endif // FULL_SYSTEM + diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index b6535baa1..1c8105204 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -58,11 +58,11 @@ class Process; template <class Impl> struct O3ThreadState : public ThreadState { typedef ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Whether or not the thread is currently in syscall mode, and * thus able to be externally updated without squashing. @@ -75,14 +75,14 @@ struct O3ThreadState : public ThreadState { bool trapPending; #if FULL_SYSTEM - O3ThreadState(FullCPU *_cpu, int _thread_num) + O3ThreadState(O3CPU *_cpu, int _thread_num) : ThreadState(-1, _thread_num), inSyscall(0), trapPending(0) { } #else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid, + O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) - : ThreadState(-1, _thread_num, mem, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { } #endif |