diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-05-31 11:45:02 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-05-31 11:45:02 -0400 |
commit | a514bf21508f4398f5cf7322f5f2a1ed212bbcaa (patch) | |
tree | e41f2e45926a5724765f762fe8c4b34e9e4d5c56 | |
parent | 94eff2f4854ce23900bcc3d694ff4c290111bea7 (diff) | |
download | gem5-a514bf21508f4398f5cf7322f5f2a1ed212bbcaa.tar.xz |
Comments and code cleanup.
cpu/activity.cc:
cpu/activity.hh:
cpu/o3/alpha_cpu.hh:
Updates to include comments.
cpu/base_dyn_inst.cc:
Remove call to thread->misspeculating(), as it's never actually misspeculating.
--HG--
extra : convert_revision : 86574d684770fac9b480475acca048ea418cdac3
36 files changed, 702 insertions, 157 deletions
diff --git a/cpu/activity.cc b/cpu/activity.cc index 6dcb6e341..b0b16446c 100644 --- a/cpu/activity.cc +++ b/cpu/activity.cc @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include "base/timebuf.hh" #include "cpu/activity.hh" @@ -14,6 +41,8 @@ ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency, void ActivityRecorder::activity() { + // If we've already recorded activity for this cycle, we don't + // want to increment the count any more. if (activityBuffer[0]) { return; } @@ -28,6 +57,8 @@ ActivityRecorder::activity() void ActivityRecorder::advance() { + // If there's a 1 in the slot that is about to be erased once the + // time buffer advances, then decrement the activityCount. if (activityBuffer[-longestLatency]) { --activityCount; @@ -46,6 +77,7 @@ ActivityRecorder::advance() void ActivityRecorder::activateStage(const int idx) { + // Increment the activity count if this stage wasn't already active. if (!stageActive[idx]) { ++activityCount; @@ -62,6 +94,7 @@ ActivityRecorder::activateStage(const int idx) void ActivityRecorder::deactivateStage(const int idx) { + // Decrement the activity count if this stage was active. if (stageActive[idx]) { --activityCount; diff --git a/cpu/activity.hh b/cpu/activity.hh index 2d53dc4bb..2c0df5efb 100644 --- a/cpu/activity.hh +++ b/cpu/activity.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_ACTIVITY_HH__ #define __CPU_ACTIVITY_HH__ @@ -5,33 +32,61 @@ #include "base/timebuf.hh" #include "base/trace.hh" +/** + * ActivityRecorder helper class that informs the CPU if it can switch + * over to being idle or not. It works by having a time buffer as + * long as any time buffer in the CPU, and the CPU and all of its + * stages inform the ActivityRecorder when they write to any time + * buffer. The ActivityRecorder marks a 1 in the "0" slot of the time + * buffer any time a stage writes to a time buffer, and it advances + * its time buffer at the same time as all other stages. The + * ActivityRecorder also records if a stage has activity to do next + * cycle. The recorder keeps a count of these two. Thus any time the + * count is non-zero, there is either communication still in flight, + * or activity that still must be done, meaning that the CPU can not + * idle. If count is zero, then the CPU can safely idle as it has no + * more outstanding work to do. + */ class ActivityRecorder { public: ActivityRecorder(int num_stages, int longest_latency, int count); /** Records that there is activity this cycle. */ void activity(); - /** Advances the activity buffer, decrementing the activityCount if active - * communication just left the time buffer, and descheduling the CPU if - * there is no activity. + + /** Advances the activity buffer, decrementing the activityCount + * if active communication just left the time buffer, and + * determining if there is no activity. */ void advance(); + /** Marks a stage as active. */ void activateStage(const int idx); + /** Deactivates a stage. */ void deactivateStage(const int idx); + /** Returns how many things are active within the recorder. */ int getActivityCount() { return activityCount; } + /** Sets the count to a starting value. Can be used to disable + * the idling option. + */ void setActivityCount(int count) { activityCount = count; } + /** Returns if the CPU should be active. */ bool active() { return activityCount; } + /** Clears the time buffer and the activity count. */ void reset(); + /** Debug function to dump the contents of the time buffer. */ void dump(); + /** Debug function to ensure that the activity count matches the + * contents of the time buffer. + */ void validate(); private: @@ -45,6 +100,7 @@ class ActivityRecorder { */ TimeBuffer<bool> activityBuffer; + /** Longest latency time buffer in the CPU. */ int longestLatency; /** Tracks how many stages and cycles of time buffer have @@ -58,6 +114,7 @@ class ActivityRecorder { */ int activityCount; + /** Number of stages that can be marked as active or inactive. */ int numStages; /** Records which stages are active/inactive. */ diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index 7ab760ae3..64a995689 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -166,6 +166,8 @@ BaseDynInst<Impl>::~BaseDynInst() delete traceData; } + fault = NoFault; + --instcount; DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n", @@ -289,7 +291,7 @@ BaseDynInst<Impl>::copy(Addr dest) { uint8_t data[64]; FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr || thread->misspeculating()); + assert(thread->copySrcPhysAddr); MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); req->asid = asid; diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh index 5c89e3462..4c452c4dd 100644 --- a/cpu/o3/alpha_cpu.hh +++ b/cpu/o3/alpha_cpu.hh @@ -39,6 +39,14 @@ namespace Kernel { class Statistics; }; +/** + * AlphaFullCPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ template <class Impl> class AlphaFullCPU : public FullO3CPU<Impl> { @@ -56,145 +64,211 @@ class AlphaFullCPU : public FullO3CPU<Impl> /** Constructs an AlphaFullCPU with the given parameters. */ AlphaFullCPU(Params *params); + /** + * Derived ExecContext class for use with the AlphaFullCPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + */ class AlphaXC : public ExecContext { public: + /** Pointer to the CPU. */ AlphaFullCPU<Impl> *cpu; + /** Pointer to the thread state that this XC corrseponds to. */ O3ThreadState<Impl> *thread; + /** Returns a pointer to this CPU. */ virtual BaseCPU *getCpuPtr() { return cpu; } + /** Sets this CPU's ID. */ virtual void setCpuId(int id) { cpu->cpu_id = id; } + /** Reads this CPU's ID. */ virtual int readCpuId() { return cpu->cpu_id; } + /** Returns a pointer to functional memory. */ virtual FunctionalMemory *getMemPtr() { return thread->mem; } #if FULL_SYSTEM + /** Returns a pointer to the system. */ virtual System *getSystemPtr() { return cpu->system; } + /** Returns a pointer to physical memory. */ virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + /** Returns a pointer to the ITB. */ virtual AlphaITB *getITBPtr() { return cpu->itb; } - virtual AlphaDTB * getDTBPtr() { return cpu->dtb; } + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + /** Returns a pointer to this thread's kernel statistics. */ virtual Kernel::Statistics *getKernelStats() { return thread->kernelStats; } #else + /** Returns a pointer to this thread's process. */ virtual Process *getProcessPtr() { return thread->process; } #endif - + /** Returns this thread's status. */ virtual Status status() const { return thread->status(); } + /** Sets this thread's status. */ virtual void setStatus(Status new_status) { thread->setStatus(new_status); } - /// Set the status to Active. Optional delay indicates number of - /// cycles to wait before beginning execution. + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ virtual void activate(int delay = 1); - /// Set the status to Suspended. + /** Set the status to Suspended. */ virtual void suspend(); - /// Set the status to Unallocated. + /** Set the status to Unallocated. */ virtual void deallocate(); - /// Set the status to Halted. + /** Set the status to Halted. */ virtual void halt(); #if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ virtual void dumpFuncProfile(); #endif - + /** Takes over execution of a thread from another CPU. */ virtual void takeOverFrom(ExecContext *old_context); + /** Registers statistics associated with this XC. */ virtual void regStats(const std::string &name); + /** Serializes state. */ virtual void serialize(std::ostream &os); + /** Unserializes state. */ virtual void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM + /** Returns pointer to the quiesce event. */ virtual EndQuiesceEvent *getQuiesceEvent(); + /** Reads the last tick that this thread was activated on. */ virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ virtual Tick readLastSuspend(); + /** Clears the function profiling information. */ virtual void profileClear(); + /** Samples the function profiling information. */ virtual void profileSample(); #endif - + /** Returns this thread's ID number. */ virtual int getThreadNum() { return thread->tid; } + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ virtual TheISA::MachInst getInst(); + /** Copies the architectural registers from another XC into this XC. */ virtual void copyArchRegs(ExecContext *xc); + /** Resets all architectural registers to 0. */ virtual void clearArchRegs(); + /** Reads an integer register. */ virtual uint64_t readIntReg(int reg_idx); + /** Reads a single precision floating point register. */ virtual float readFloatRegSingle(int reg_idx); + /** Reads a double precision floating point register. */ virtual double readFloatRegDouble(int reg_idx); + /** Reads a floating point register as an integer value. */ virtual uint64_t readFloatRegInt(int reg_idx); + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val); + /** Sets a single precision fp register to a value. */ virtual void setFloatRegSingle(int reg_idx, float val); + /** Sets a double precision fp register to a value. */ virtual void setFloatRegDouble(int reg_idx, double val); + /** Sets a fp register to an integer value. */ virtual void setFloatRegInt(int reg_idx, uint64_t val); + /** Reads this thread's PC. */ virtual uint64_t readPC() { return cpu->readPC(thread->tid); } + /** Sets this thread's PC. */ virtual void setPC(uint64_t val); + /** Reads this thread's next PC. */ virtual uint64_t readNextPC() { return cpu->readNextPC(thread->tid); } + /** Sets this thread's next PC. */ virtual void setNextPC(uint64_t val); + /** Reads a miscellaneous register. */ virtual MiscReg readMiscReg(int misc_reg) { return cpu->readMiscReg(misc_reg, thread->tid); } + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); } + /** Sets a misc. register. */ virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + /** Returns the number of consecutive store conditional failures. */ // @todo: Figure out where these store cond failures should go. virtual unsigned readStCondFailures() { return thread->storeCondFailures; } + /** Sets the number of consecutive store conditional failures. */ virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; } #if FULL_SYSTEM + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); } #endif - // Only really makes sense for old CPU model. Lots of code // outside the CPU still checks this function, so it will // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ virtual bool misspeculating() { return false; } #if !FULL_SYSTEM + /** Gets a syscall argument by index. */ virtual IntReg getSyscallArg(int i); + /** Sets a syscall argument. */ virtual void setSyscallArg(int i, IntReg val); + /** Sets the syscall return value. */ virtual void setSyscallReturn(SyscallReturn return_value); + /** Executes a syscall in SE mode. */ virtual void syscall() { return cpu->syscall(thread->tid); } + /** Reads the funcExeInst counter. */ virtual Counter readFuncExeInst() { return thread->funcExeInst; } #endif }; @@ -260,19 +334,32 @@ class AlphaFullCPU : public FullO3CPU<Impl> } #endif + /** Reads a miscellaneous register. */ MiscReg readMiscReg(int misc_reg, unsigned tid); + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + /** Sets a miscellaneous register. */ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the XC. + */ void squashFromXC(unsigned tid); #if FULL_SYSTEM + /** Posts an interrupt. */ void post_interrupt(int int_num, int index); - + /** Reads the interrupt flag. */ int readIntrFlag(); /** Sets the interrupt flags. */ void setIntrFlag(int val); @@ -298,7 +385,7 @@ class AlphaFullCPU : public FullO3CPU<Impl> /** Executes a syscall. * @todo: Determine if this needs to be virtual. */ - void syscall(int thread_num); + void syscall(int tid); /** Gets a syscall argument. */ IntReg getSyscallArg(int i, int tid); @@ -424,6 +511,7 @@ class AlphaFullCPU : public FullO3CPU<Impl> Addr lockAddr; + /** Temporary fix for the lock flag, works in the UP case. */ bool lockFlag; }; diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index 91cd3d9e6..f39fdf6b6 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -59,10 +59,12 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params) { DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + // Setup any thread state. this->thread.resize(this->numThreads); for (int i = 0; i < this->numThreads; ++i) { #if FULL_SYSTEM + // SMT is not supported in FS mode yet. assert(this->numThreads == 1); this->thread[i] = new Thread(this, 0, params->mem); this->thread[i]->setStatus(ExecContext::Suspended); @@ -87,29 +89,34 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params) } #endif // !FULL_SYSTEM - this->thread[i]->numInst = 0; - ExecContext *xc_proxy; - AlphaXC *alpha_xc_proxy = new AlphaXC; + // Setup the XC that will serve as the interface to the threads/CPU. + AlphaXC *alpha_xc = new AlphaXC; + // If we're using a checker, then the XC should be the + // CheckerExecContext. if (params->checker) { - xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker); + xc_proxy = new CheckerExecContext<AlphaXC>( + alpha_xc, this->checker); } else { - xc_proxy = alpha_xc_proxy; + xc_proxy = alpha_xc; } - alpha_xc_proxy->cpu = this; - alpha_xc_proxy->thread = this->thread[i]; + alpha_xc->cpu = this; + alpha_xc->thread = this->thread[i]; #if FULL_SYSTEM + // Setup quiesce event. this->thread[i]->quiesceEvent = new EndQuiesceEvent(xc_proxy); this->thread[i]->lastActivate = 0; this->thread[i]->lastSuspend = 0; #endif + // Give the thread the XC. this->thread[i]->xcProxy = xc_proxy; + // Add the XC to the CPU's list of XC's. this->execContexts.push_back(xc_proxy); } @@ -171,6 +178,7 @@ AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context) setStatus(old_context->status()); copyArchRegs(old_context); setCpuId(old_context->readCpuId()); + #if !FULL_SYSTEM thread->funcExeInst = old_context->readFuncExeInst(); #else @@ -394,7 +402,6 @@ template <class Impl> uint64_t AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx) { - DPRINTF(Fault, "Reading int register through the XC!\n"); return cpu->readArchIntReg(reg_idx, thread->tid); } @@ -402,7 +409,6 @@ template <class Impl> float AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx) { - DPRINTF(Fault, "Reading float register through the XC!\n"); return cpu->readArchFloatRegSingle(reg_idx, thread->tid); } @@ -410,7 +416,6 @@ template <class Impl> double AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx) { - DPRINTF(Fault, "Reading float register through the XC!\n"); return cpu->readArchFloatRegDouble(reg_idx, thread->tid); } @@ -418,7 +423,6 @@ template <class Impl> uint64_t AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx) { - DPRINTF(Fault, "Reading floatint register through the XC!\n"); return cpu->readArchFloatRegInt(reg_idx, thread->tid); } @@ -426,9 +430,9 @@ template <class Impl> void AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val) { - DPRINTF(Fault, "Setting int register through the XC!\n"); cpu->setArchIntReg(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -438,9 +442,9 @@ template <class Impl> void AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val) { - DPRINTF(Fault, "Setting float register through the XC!\n"); cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -450,9 +454,9 @@ template <class Impl> void AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val) { - DPRINTF(Fault, "Setting float register through the XC!\n"); cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -462,9 +466,9 @@ template <class Impl> void AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val) { - DPRINTF(Fault, "Setting floatint register through the XC!\n"); cpu->setArchFloatRegInt(reg_idx, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -476,6 +480,7 @@ AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val) { cpu->setPC(val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -487,6 +492,7 @@ AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val) { cpu->setNextPC(val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -496,10 +502,9 @@ template <class Impl> Fault AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val) { - DPRINTF(Fault, "Setting misc register through the XC!\n"); - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -509,12 +514,12 @@ AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val) template <class Impl> Fault -AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) { - DPRINTF(Fault, "Setting misc register through the XC!\n"); - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid); + // Squash if we're not already in a state update mode. if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); } @@ -595,7 +600,6 @@ AlphaFullCPU<Impl>::post_interrupt(int int_num, int index) if (this->thread[0]->status() == ExecContext::Suspended) { DPRINTF(IPI,"Suspended Processor awoke\n"); -// xcProxies[0]->activate(); this->execContexts[0]->activate(); } } @@ -658,6 +662,7 @@ template <class Impl> void AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid) { + // Pass the thread's XC into the invoke method. fault->invoke(this->execContexts[tid]); } @@ -708,6 +713,7 @@ AlphaFullCPU<Impl>::processInterrupts() if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { this->setMiscReg(IPR_ISR, summary, 0); this->setMiscReg(IPR_INTID, ipl, 0); + // Checker needs to know these two registers were updated. if (this->checker) { this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); diff --git a/cpu/o3/alpha_dyn_inst.hh b/cpu/o3/alpha_dyn_inst.hh index 1c5b738aa..de4d40358 100644 --- a/cpu/o3/alpha_dyn_inst.hh +++ b/cpu/o3/alpha_dyn_inst.hh @@ -86,23 +86,31 @@ class AlphaDynInst : public BaseDynInst<Impl> void initVars(); public: + /** Reads a miscellaneous register. */ MiscReg readMiscReg(int misc_reg) { return this->cpu->readMiscReg(misc_reg, this->threadNumber); } + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { return this->cpu->readMiscRegWithEffect(misc_reg, fault, this->threadNumber); } + /** Sets a misc. register. */ Fault setMiscReg(int misc_reg, const MiscReg &val) { this->instResult.integer = val; return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); } + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) { return this->cpu->setMiscRegWithEffect(misc_reg, val, diff --git a/cpu/o3/alpha_dyn_inst_impl.hh b/cpu/o3/alpha_dyn_inst_impl.hh index 541d5ab82..d82d46830 100644 --- a/cpu/o3/alpha_dyn_inst_impl.hh +++ b/cpu/o3/alpha_dyn_inst_impl.hh @@ -64,9 +64,10 @@ template <class Impl> Fault AlphaDynInst<Impl>::execute() { - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the XC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the XC). Fix this. bool in_syscall = this->thread->inSyscall; this->thread->inSyscall = true; @@ -81,9 +82,10 @@ template <class Impl> Fault AlphaDynInst<Impl>::initiateAcc() { - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the XC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the XC). Fix this. bool in_syscall = this->thread->inSyscall; this->thread->inSyscall = true; @@ -99,10 +101,12 @@ Fault AlphaDynInst<Impl>::completeAcc() { if (this->isLoad()) { + // Loads need the request's data to complete the access. this->fault = this->staticInst->completeAcc(this->req->data, this, this->traceData); } else if (this->isStore()) { + // Stores need the result of the request to complete their access. this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, this, this->traceData); @@ -118,9 +122,11 @@ template <class Impl> Fault AlphaDynInst<Impl>::hwrei() { + // Can only do a hwrei when in pal mode. if (!this->cpu->inPalMode(this->readPC())) return new AlphaISA::UnimplementedOpcodeFault; + // Set the next PC based on the value of the EXC_ADDR IPR. this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR, this->threadNumber)); diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index 5eb00426d..f0836a9fd 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -125,7 +125,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // - // Branch predictor (BP & BTB) + // Branch predictor (BP, BTB, RAS) // std::string predType; unsigned localPredictorSize; diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh index c36c58d3d..d9a242a12 100644 --- a/cpu/o3/comm.hh +++ b/cpu/o3/comm.hh @@ -41,6 +41,7 @@ // typedef yet are not templated on the Impl. For now it will be defined here. typedef short int PhysRegIndex; +/** Struct that defines the information passed from fetch to decode. */ template<class Impl> struct DefaultFetchDefaultDecode { typedef typename Impl::DynInstPtr DynInstPtr; @@ -53,6 +54,7 @@ struct DefaultFetchDefaultDecode { bool clearFetchFault; }; +/** Struct that defines the information passed from decode to rename. */ template<class Impl> struct DefaultDecodeDefaultRename { typedef typename Impl::DynInstPtr DynInstPtr; @@ -62,6 +64,7 @@ struct DefaultDecodeDefaultRename { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines the information passed from rename to IEW. */ template<class Impl> struct DefaultRenameDefaultIEW { typedef typename Impl::DynInstPtr DynInstPtr; @@ -71,6 +74,7 @@ struct DefaultRenameDefaultIEW { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines the information passed from IEW to commit. */ template<class Impl> struct DefaultIEWDefaultCommit { typedef typename Impl::DynInstPtr DynInstPtr; @@ -98,6 +102,7 @@ struct IssueStruct { DynInstPtr insts[Impl::MaxWidth]; }; +/** Struct that defines all backwards communication. */ template<class Impl> struct TimeBufStruct { struct decodeComm { @@ -119,13 +124,7 @@ struct TimeBufStruct { decodeComm decodeInfo[Impl::MaxThreads]; - // Rename can't actually tell anything to squash or send a new PC back - // because it doesn't do anything along those lines. But maybe leave - // these fields in here to keep the stages mostly orthagonal. struct renameComm { - bool squash; - - uint64_t nextPC; }; renameComm renameInfo[Impl::MaxThreads]; diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh index 66abf8dc6..d93822394 100644 --- a/cpu/o3/commit.hh +++ b/cpu/o3/commit.hh @@ -84,6 +84,9 @@ class DefaultCommit typedef O3ThreadState<Impl> Thread; + /** Event class used to schedule a squash due to a trap (fault or + * interrupt) to happen on a specific cycle. + */ class TrapEvent : public Event { private: DefaultCommit<Impl> *commit; @@ -161,7 +164,7 @@ class DefaultCommit Fetch *fetchStage; - /** Sets the poitner to the IEW stage. */ + /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); /** The pointer to the IEW stage. Used solely to ensure that @@ -182,10 +185,13 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); + /** Initializes the switching out of commit. */ void switchOut(); + /** Completes the switch out of commit. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Ticks the commit stage, which tries to commit instructions. */ @@ -199,11 +205,18 @@ class DefaultCommit /** Returns the number of free ROB entries for a specific thread. */ unsigned numROBFreeEntries(unsigned tid); + /** Generates an event to schedule a squash due to a trap. */ + void generateTrapEvent(unsigned tid); + + /** Records that commit needs to initiate a squash due to an + * external state update through the XC. + */ void generateXCEvent(unsigned tid); private: /** Updates the overall status of commit with the nextStatus, and - * tell the CPU if commit is active/inactive. */ + * tell the CPU if commit is active/inactive. + */ void updateStatus(); /** Sets the next status based on threads' statuses, which becomes the @@ -222,10 +235,13 @@ class DefaultCommit */ bool changedROBEntries(); + /** Squashes all in flight instructions. */ void squashAll(unsigned tid); + /** Handles squashing due to a trap. */ void squashFromTrap(unsigned tid); + /** Handles squashing due to an XC write. */ void squashFromXC(unsigned tid); /** Commits as many instructions as possible. */ @@ -236,8 +252,6 @@ class DefaultCommit */ bool commitHead(DynInstPtr &head_inst, unsigned inst_num); - void generateTrapEvent(unsigned tid); - /** Gets instructions from rename and inserts them into the ROB. */ void getInsts(); @@ -259,12 +273,16 @@ class DefaultCommit */ uint64_t readPC() { return PC[0]; } + /** Returns the PC of a specific thread. */ uint64_t readPC(unsigned tid) { return PC[tid]; } + /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + /** Reads the PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } + /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } private: @@ -304,6 +322,7 @@ class DefaultCommit /** Memory interface. Used for d-cache accesses. */ MemInterface *dcacheInterface; + /** Vector of all of the threads. */ std::vector<Thread *> thread; Fault fetchFault; @@ -362,17 +381,27 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; + /** Is a switch out pending. */ bool switchPending; + + /** Is commit switched out. */ bool switchedOut; + /** The latency to handle a trap. Used when scheduling trap + * squash event. + */ Tick trapLatency; Tick fetchTrapLatency; Tick fetchFaultTick; + /** The commit PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ Addr PC[Impl::MaxThreads]; + /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; /** The sequence number of the youngest valid instruction in the ROB. */ @@ -384,6 +413,7 @@ class DefaultCommit /** Rename map interface. */ RenameMap *renameMap[Impl::MaxThreads]; + /** Updates commit stats based on this instruction. */ void updateComInstStats(DynInstPtr &inst); /** Stat for the total number of committed instructions. */ @@ -417,7 +447,9 @@ class DefaultCommit /** Total number of committed branches. */ Stats::Vector<> statComBranches; + /** Number of cycles where the commit bandwidth limit is reached. */ Stats::Scalar<> commitEligibleSamples; + /** Number of instructions not committed due to bandwidth limits. */ Stats::Vector<> commitEligible; }; diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 346a8bc1c..9409697eb 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -691,7 +691,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; - +/* if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { // Record the fault. Wait until it's empty in the ROB. // Then handle the trap. Ignore it if there's already a @@ -713,7 +713,7 @@ DefaultCommit<Impl>::commit() commitStatus[0] = Running; } } - +*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -925,7 +925,7 @@ DefaultCommit<Impl>::commitInsts() numCommittedDist.sample(num_committed); if (num_committed == commitWidth) { - commitEligible[0]++; + commitEligibleSamples[0]++; } } @@ -947,6 +947,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->reachedCommit = true; if (head_inst->isNonSpeculative() || + head_inst->isStoreConditional() || head_inst->isMemBarrier() || head_inst->isWriteBarrier()) { diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh index 8db65d501..f4b19bfb3 100644 --- a/cpu/o3/cpu.hh +++ b/cpu/o3/cpu.hh @@ -67,6 +67,11 @@ class BaseFullCPU : public BaseCPU int cpu_id; }; +/** + * FullO3CPU class, has each of the stages (fetch through commit) + * within it, as well as all of the time buffers between stages. The + * tick() function for the CPU is defined here. + */ template <class Impl> class FullO3CPU : public BaseFullCPU { @@ -194,17 +199,13 @@ class FullO3CPU : public BaseFullCPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Check if there are any system calls pending. */ - void checkSyscalls(); - - /** Switches out this CPU. - */ + /** Switches out this CPU. */ void switchOut(Sampler *sampler); + /** Signals to this CPU that a stage has completed switching out. */ void signalSwitched(); - /** Takes over from another CPU. - */ + /** Takes over from another CPU. */ void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ @@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU #endif - // - // New accessors for new decoder. - // + /** Register accessors. Index refers to the physical register index. */ uint64_t readIntReg(int reg_idx); float readFloatRegSingle(int reg_idx); @@ -271,6 +270,11 @@ class FullO3CPU : public BaseFullCPU uint64_t readArchFloatRegInt(int reg_idx, unsigned tid); + /** Architectural register accessors. Looks up in the commit + * rename table to obtain the true physical index of the + * architected register first, then accesses that physical + * register. + */ void setArchIntReg(int reg_idx, uint64_t val, unsigned tid); void setArchFloatRegSingle(int reg_idx, float val, unsigned tid); @@ -279,13 +283,17 @@ class FullO3CPU : public BaseFullCPU void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); + /** Reads the commit PC of a specific thread. */ uint64_t readPC(unsigned tid); - void setPC(Addr new_PC,unsigned tid); + /** Sets the commit PC of a specific thread. */ + void setPC(Addr new_PC, unsigned tid); + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid); - void setNextPC(uint64_t val,unsigned tid); + /** Sets the next PC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid); /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. @@ -309,21 +317,15 @@ class FullO3CPU : public BaseFullCPU /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); + /** Removes the instruction pointed to by the iterator. */ inline void squashInstIt(const ListIt &instIt, const unsigned &tid); + /** Cleans up all instructions on the remove list. */ void cleanUpRemovedInsts(); - /** Remove all instructions from the list. */ -// void removeAllInsts(); - + /** Debug function to print all instructions on the list. */ void dumpInsts(); - /** Basically a wrapper function so that instructions executed at - * commit can tell the instruction queue that they have - * completed. Eventually this hack should be removed. - */ -// void wakeDependents(DynInstPtr &inst); - public: /** List of all the instructions in flight. */ std::list<DynInstPtr> instList; @@ -334,6 +336,9 @@ class FullO3CPU : public BaseFullCPU std::queue<ListIt> removeList; #ifdef DEBUG + /** Debug structure to keep track of the sequence numbers still in + * flight. + */ std::set<InstSeqNum> snList; #endif @@ -420,14 +425,22 @@ class FullO3CPU : public BaseFullCPU /** The IEW stage's instruction queue. */ TimeBuffer<IEWStruct> iewQueue; - public: + private: + /** The activity recorder; used to tell if the CPU has any + * activity remaining or if it can go to idle and deschedule + * itself. + */ ActivityRecorder activityRec; + public: + /** Records that there was time buffer activity this cycle. */ void activityThisCycle() { activityRec.activity(); } + /** Changes a stage's status to active within the activity recorder. */ void activateStage(const StageIdx idx) { activityRec.activateStage(idx); } + /** Changes a stage's status to inactive within the activity recorder. */ void deactivateStage(const StageIdx idx) { activityRec.deactivateStage(idx); } @@ -438,7 +451,7 @@ class FullO3CPU : public BaseFullCPU int getFreeTid(); public: - /** Temporary function to get pointer to exec context. */ + /** Returns a pointer to a thread's exec context. */ ExecContext *xcBase(unsigned tid) { return thread[tid]->getXCProxy(); @@ -447,6 +460,10 @@ class FullO3CPU : public BaseFullCPU /** The global sequence number counter. */ InstSeqNum globalSeqNum; + /** Pointer to the checker, which can dynamically verify + * instruction results at run time. This can be set to NULL if it + * is not being used. + */ Checker<DynInstPtr> *checker; #if FULL_SYSTEM @@ -462,11 +479,13 @@ class FullO3CPU : public BaseFullCPU /** Pointer to memory. */ FunctionalMemory *mem; + /** Pointer to the sampler */ Sampler *sampler; + /** Counter of how many stages have completed switching out. */ int switchCount; - // List of all ExecContexts. + /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; #if 0 diff --git a/cpu/o3/cpu_policy.hh b/cpu/o3/cpu_policy.hh index b4249b12d..c30e58389 100644 --- a/cpu/o3/cpu_policy.hh +++ b/cpu/o3/cpu_policy.hh @@ -48,24 +48,50 @@ #include "cpu/o3/comm.hh" +/** + * Struct that defines the key classes to be used by the CPU. All + * classes use the typedefs defined here to determine what are the + * classes of the other stages and communication buffers. In order to + * change a structure such as the IQ, simply change the typedef here + * to use the desired class instead, and recompile. In order to + * create a different CPU to be used simultaneously with this one, see + * the alpha_impl.hh file for instructions. + */ template<class Impl> struct SimpleCPUPolicy { + /** Typedef for the branch prediction unit (which includes the BP, + * RAS, and BTB). + */ typedef BPredUnit<Impl> BPredUnit; + /** Typedef for the register file. Most classes assume a unified + * physical register file. + */ typedef PhysRegFile<Impl> RegFile; + /** Typedef for the freelist of registers. */ typedef SimpleFreeList FreeList; + /** Typedef for the rename map. */ typedef SimpleRenameMap RenameMap; + /** Typedef for the ROB. */ typedef ROB<Impl> ROB; + /** Typedef for the instruction queue/scheduler. */ typedef InstructionQueue<Impl> IQ; + /** Typedef for the memory dependence unit. */ typedef MemDepUnit<StoreSet, Impl> MemDepUnit; + /** Typedef for the LSQ. */ typedef LSQ<Impl> LSQ; + /** Typedef for the thread-specific LSQ units. */ typedef LSQUnit<Impl> LSQUnit; - + /** Typedef for fetch. */ typedef DefaultFetch<Impl> Fetch; + /** Typedef for decode. */ typedef DefaultDecode<Impl> Decode; + /** Typedef for rename. */ typedef DefaultRename<Impl> Rename; + /** Typedef for Issue/Execute/Writeback. */ typedef DefaultIEW<Impl> IEW; + /** Typedef for commit. */ typedef DefaultCommit<Impl> Commit; /** The struct for communication between fetch and decode. */ diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh index 3035b3387..b336575a8 100644 --- a/cpu/o3/decode.hh +++ b/cpu/o3/decode.hh @@ -107,9 +107,12 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Switches out the decode stage. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Ticks decode, processing all input signals and decoding as many * instructions as possible. */ diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 8d84d46c8..0b686375e 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -41,6 +41,7 @@ DefaultDecode<Impl>::DefaultDecode(Params *params) { _status = Inactive; + // Setup status, make sure stall signals are clear. for (int i = 0; i < numThreads; ++i) { decodeStatus[i] = Idle; @@ -165,6 +166,7 @@ template <class Impl> void DefaultDecode<Impl>::switchOut() { + // Decode can immediately switch out. cpu->signalSwitched(); } @@ -174,6 +176,7 @@ DefaultDecode<Impl>::takeOverFrom() { _status = Inactive; + // Be sure to reset state and clear out any old instructions. for (int i = 0; i < numThreads; ++i) { decodeStatus[i] = Idle; @@ -222,22 +225,22 @@ DefaultDecode<Impl>::block(unsigned tid) { DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid); - // If the decode status is blocked or unblocking then decode has not yet - // signalled fetch to unblock. In that case, there is no need to tell - // fetch to block. - if (decodeStatus[tid] != Blocked && - decodeStatus[tid] != Unblocking) { - toFetch->decodeBlock[tid] = true; - wroteToTimeBuffer = true; - } - // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. skidInsert(tid); + // If the decode status is blocked or unblocking then decode has not yet + // signalled fetch to unblock. In that case, there is no need to tell + // fetch to block. if (decodeStatus[tid] != Blocked) { // Set the status to Blocked. decodeStatus[tid] = Blocked; + + if (decodeStatus[tid] != Unblocking) { + toFetch->decodeBlock[tid] = true; + wroteToTimeBuffer = true; + } + return true; } @@ -270,13 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction " "detected at decode.\n", tid); + // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); - toFetch->decodeInfo[tid].branchTaken = true; + toFetch->decodeInfo[tid].branchTaken = + inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { toFetch->decodeUnblock[tid] = 1; @@ -292,11 +298,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) } } + // Clear the instruction list and skid buffer in case they have any + // insts in them. while (!insts[tid].empty()) { insts[tid].pop(); } - // Clear the skid buffer in case it has any data in it. while (!skidBuffer[tid].empty()) { skidBuffer[tid].pop(); } @@ -341,11 +348,12 @@ DefaultDecode<Impl>::squash(unsigned tid) } } + // Clear the instruction list and skid buffer in case they have any + // insts in them. while (!insts[tid].empty()) { insts[tid].pop(); } - // Clear the skid buffer in case it has any data in it. while (!skidBuffer[tid].empty()) { skidBuffer[tid].pop(); } diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh index f8ae38da4..b6c5f1ab1 100644 --- a/cpu/o3/dep_graph.hh +++ b/cpu/o3/dep_graph.hh @@ -4,6 +4,7 @@ #include "cpu/o3/comm.hh" +/** Node in a linked list. */ template <class DynInstPtr> class DependencyEntry { @@ -18,32 +19,50 @@ class DependencyEntry DependencyEntry<DynInstPtr> *next; }; +/** Array of linked list that maintains the dependencies between + * producing instructions and consuming instructions. Each linked + * list represents a single physical register, having the future + * producer of the register's value, and all consumers waiting on that + * value on the list. The head node of each linked list represents + * the producing instruction of that register. Instructions are put + * on the list upon reaching the IQ, and are removed from the list + * either when the producer completes, or the instruction is squashed. +*/ template <class DynInstPtr> class DependencyGraph { public: typedef DependencyEntry<DynInstPtr> DepEntry; + /** Default construction. Must call resize() prior to use. */ DependencyGraph() : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); + /** Clears all of the linked lists. */ void reset(); + /** Inserts an instruction to be dependent on the given index. */ void insert(PhysRegIndex idx, DynInstPtr &new_inst); + /** Sets the producing instruction of a given register. */ void setInst(PhysRegIndex idx, DynInstPtr &new_inst) { dependGraph[idx].inst = new_inst; } + /** Clears the producing instruction. */ void clearInst(PhysRegIndex idx) { dependGraph[idx].inst = NULL; } + /** Removes an instruction from a single linked list. */ void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + /** Removes and returns the newest dependent of a specific register. */ DynInstPtr pop(PhysRegIndex idx); + /** Checks if there are any dependents on a specific register. */ bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } /** Debugging function to dump out the dependency graph. @@ -59,13 +78,16 @@ class DependencyGraph */ DepEntry *dependGraph; + /** Number of linked lists; identical to the number of registers. */ int numEntries; // Debug variable, remove when done testing. unsigned memAllocCounter; public: + // Debug variable, remove when done testing. uint64_t nodesTraversed; + // Debug variable, remove when done testing. uint64_t nodesRemoved; }; diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh index 3fcfdc3a1..92a87ab54 100644 --- a/cpu/o3/fetch.hh +++ b/cpu/o3/fetch.hh @@ -42,7 +42,7 @@ class Sampler; * width is specified by the parameters; each cycle it tries to fetch * that many instructions. It supports using a branch predictor to * predict direction and targets. - * It supports the idling functionalitiy of the CPU by indicating to + * It supports the idling functionality of the CPU by indicating to * the CPU when it is active and inactive. */ template <class Impl> @@ -163,14 +163,19 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(MemReqPtr &req); + /** Begins the switch out of the fetch stage. */ void switchOut(); + /** Completes the switch out of the fetch stage. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Checks if the fetch stage is switched out. */ bool isSwitchedOut() { return switchedOut; } + /** Tells fetch to wake up from a quiesce instruction. */ void wakeFromQuiesce(); private: @@ -301,8 +306,10 @@ class DefaultFetch /** BPredUnit. */ BPredUnit branchPred; + /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; + /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; /** Memory request used to access cache. */ @@ -369,8 +376,12 @@ class DefaultFetch /** Thread ID being fetched. */ int threadFetched; + /** Checks if there is an interrupt pending. If there is, fetch + * must stop once it is not fetching PAL instructions. + */ bool interruptPending; + /** Records if fetch is switched out. */ bool switchedOut; #if !FULL_SYSTEM @@ -394,17 +405,23 @@ class DefaultFetch * the pipeline. */ Stats::Scalar<> fetchIdleCycles; + /** Total number of cycles spent blocked. */ Stats::Scalar<> fetchBlockedCycles; - + /** Total number of cycles spent in any other state. */ Stats::Scalar<> fetchMiscStallCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; - + /** Total number of outstanding icache accesses that were dropped + * due to a squash. + */ Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; + /** Rate of how often fetch was idle. */ Stats::Formula idleRate; + /** Number of branch fetches per cycle. */ Stats::Formula branchRate; + /** Number of instruction fetched per cycle. */ Stats::Formula fetchRate; }; diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index 1c5e508f6..a309bd49a 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -161,59 +161,59 @@ void DefaultFetch<Impl>::regStats() { icacheStallCycles - .name(name() + ".FETCH:icacheStallCycles") + .name(name() + ".icacheStallCycles") .desc("Number of cycles fetch is stalled on an Icache miss") .prereq(icacheStallCycles); fetchedInsts - .name(name() + ".FETCH:Insts") + .name(name() + ".Insts") .desc("Number of instructions fetch has processed") .prereq(fetchedInsts); fetchedBranches - .name(name() + ".FETCH:Branches") + .name(name() + ".Branches") .desc("Number of branches that fetch encountered") .prereq(fetchedBranches); predictedBranches - .name(name() + ".FETCH:predictedBranches") + .name(name() + ".predictedBranches") .desc("Number of branches that fetch has predicted taken") .prereq(predictedBranches); fetchCycles - .name(name() + ".FETCH:Cycles") + .name(name() + ".Cycles") .desc("Number of cycles fetch has run and was not squashing or" " blocked") .prereq(fetchCycles); fetchSquashCycles - .name(name() + ".FETCH:SquashCycles") + .name(name() + ".SquashCycles") .desc("Number of cycles fetch has spent squashing") .prereq(fetchSquashCycles); fetchIdleCycles - .name(name() + ".FETCH:IdleCycles") + .name(name() + ".IdleCycles") .desc("Number of cycles fetch was idle") .prereq(fetchIdleCycles); fetchBlockedCycles - .name(name() + ".FETCH:BlockedCycles") + .name(name() + ".BlockedCycles") .desc("Number of cycles fetch has spent blocked") .prereq(fetchBlockedCycles); fetchedCacheLines - .name(name() + ".FETCH:CacheLines") + .name(name() + ".CacheLines") .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); fetchMiscStallCycles - .name(name() + ".FETCH:MiscStallCycles") + .name(name() + ".MiscStallCycles") .desc("Number of cycles fetch has spent waiting on interrupts, or " "bad addresses, or out of MSHRs") .prereq(fetchMiscStallCycles); fetchIcacheSquashes - .name(name() + ".FETCH:IcacheSquashes") + .name(name() + ".IcacheSquashes") .desc("Number of outstanding Icache misses that were squashed") .prereq(fetchIcacheSquashes); @@ -221,24 +221,24 @@ DefaultFetch<Impl>::regStats() .init(/* base value */ 0, /* last value */ fetchWidth, /* bucket size */ 1) - .name(name() + ".FETCH:rateDist") + .name(name() + ".rateDist") .desc("Number of instructions fetched each cycle (Total)") .flags(Stats::pdf); idleRate - .name(name() + ".FETCH:idleRate") + .name(name() + ".idleRate") .desc("Percent of cycles fetch was idle") .prereq(idleRate); idleRate = fetchIdleCycles * 100 / cpu->numCycles; branchRate - .name(name() + ".FETCH:branchRate") + .name(name() + ".branchRate") .desc("Number of branch fetches per cycle") .flags(Stats::total); - branchRate = predictedBranches / cpu->numCycles; + branchRate = fetchedBranches / cpu->numCycles; fetchRate - .name(name() + ".FETCH:rate") + .name(name() + ".rate") .desc("Number of inst fetches per cycle") .flags(Stats::total); fetchRate = fetchedInsts / cpu->numCycles; @@ -307,6 +307,7 @@ template<class Impl> void DefaultFetch<Impl>::initStage() { + // Setup PC and nextPC with initial state. for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); @@ -323,8 +324,6 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req) // Only change the status if it's still waiting on the icache access // to return. - // Can keep track of how many cache accesses go unused due to - // misspeculation here. if (fetchStatus[tid] != IcacheMissStall || req != memReq[tid] || isSwitchedOut()) { @@ -358,6 +357,7 @@ template <class Impl> void DefaultFetch<Impl>::switchOut() { + // Fetch is ready to switch out at any time. switchedOut = true; cpu->signalSwitched(); } @@ -366,6 +366,7 @@ template <class Impl> void DefaultFetch<Impl>::doSwitchOut() { + // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -396,6 +397,7 @@ DefaultFetch<Impl>::wakeFromQuiesce() { DPRINTF(Fetch, "Waking up from quiesce\n"); // Hopefully this is safe + // @todo: Allow other threads to wake from quiesce. fetchStatus[0] = Running; } @@ -831,7 +833,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) } } - if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) { + if (fetchStatus[tid] != IcacheMissStall && checkStall(tid)) { DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); fetchStatus[tid] = Blocked; @@ -1199,7 +1201,7 @@ DefaultFetch<Impl>::lsqCount() if (fetchStatus[high_pri] == Running || fetchStatus[high_pri] == IcacheMissComplete || - fetchStatus[high_pri] == Idle) + fetchStatus[high_pri] == Idle) return high_pri; else PQ.pop(); diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc index fb2b5c00d..b28b5d37f 100644 --- a/cpu/o3/fu_pool.cc +++ b/cpu/o3/fu_pool.cc @@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability) } } + assert(fu_idx < numFU); + unitBusy[fu_idx] = true; return fu_idx; diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh index da6fdc802..052e4832d 100644 --- a/cpu/o3/fu_pool.hh +++ b/cpu/o3/fu_pool.hh @@ -155,7 +155,10 @@ class FUPool : public SimObject return maxIssueLatencies[capability]; } + /** Switches out functional unit pool. */ void switchOut(); + + /** Takes over from another CPU's thread. */ void takeOverFrom(); }; diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh index 935320628..eda6a6bc0 100644 --- a/cpu/o3/iew.hh +++ b/cpu/o3/iew.hh @@ -160,12 +160,16 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); + /** Starts switch out of IEW stage. */ void switchOut(); + /** Completes switch out of IEW stage. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Returns if IEW is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Sets page table pointer within LSQ. */ @@ -287,6 +291,7 @@ class DefaultIEW void tick(); private: + /** Updates execution stats based on the instruction. */ void updateExeInstStats(DynInstPtr &inst); /** Pointer to main time buffer used for backwards communication. */ @@ -429,6 +434,7 @@ class DefaultIEW /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** Is this stage switched out. */ bool switchedOut; /** Stat for total number of idle cycles. */ @@ -470,9 +476,13 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Number of executed software prefetches. */ Stats::Vector<> exeSwp; + /** Number of executed nops. */ Stats::Vector<> exeNop; + /** Number of executed meomory references. */ Stats::Vector<> exeRefs; + /** Number of executed branches. */ Stats::Vector<> exeBranches; // Stats::Vector<> issued_ops; @@ -482,19 +492,30 @@ class DefaultIEW Stats::Vector<> dist_unissued; Stats::Vector2d<> stat_issued_inst_type; */ + /** Number of instructions issued per cycle. */ Stats::Formula issueRate; + /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; // Stats::Formula issue_op_rate; // Stats::Formula fu_busy_rate; - + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; + /** Number of instructions that writeback. */ Stats::Vector<> writebackCount; + /** Number of instructions that wake consumers. */ Stats::Vector<> producerInst; + /** Number of instructions that wake up from producers. */ Stats::Vector<> consumerInst; + /** Number of instructions that were delayed in writing back due + * to resource contention. + */ Stats::Vector<> wbPenalized; + /** Number of instructions per cycle written back. */ Stats::Formula wbRate; + /** Average number of woken instructions per writeback. */ Stats::Formula wbFanout; + /** Number of instructions per cycle delayed in writing back . */ Stats::Formula wbPenalizedRate; }; diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh index b0137d7fc..3ed20cb75 100644 --- a/cpu/o3/iew_impl.hh +++ b/cpu/o3/iew_impl.hh @@ -433,6 +433,7 @@ template <class Impl> void DefaultIEW<Impl>::switchOut() { + // IEW is ready to switch out at any time. cpu->signalSwitched(); } @@ -440,6 +441,7 @@ template <class Impl> void DefaultIEW<Impl>::doSwitchOut() { + // Clear any state. switchedOut = true; instQueue.switchOut(); @@ -458,6 +460,7 @@ template <class Impl> void DefaultIEW<Impl>::takeOverFrom() { + // Reset all state. _status = Active; exeStatus = Running; wbStatus = Idle; @@ -571,6 +574,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->nextPC[tid] = inst->readPC(); + // Must include the broadcasted SN in the squash. toCommit->includeSquashInst[tid] = true; ldstQueue.setLoadBlockedHandled(tid); @@ -1104,6 +1108,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Store conditionals need to be set as "canCommit()" // so that commit can process them when they reach the // head of commit. + // @todo: This is somewhat specific to Alpha. inst->setCanCommit(); instQueue.insertNonSpec(inst); add_to_iq = false; @@ -1363,6 +1368,7 @@ DefaultIEW<Impl>::executeInsts() } } + // Update and record activity if we processed any instructions. if (inst_num) { if (exeStatus == Idle) { exeStatus = Running; @@ -1413,8 +1419,10 @@ DefaultIEW<Impl>::writebackInsts() scoreboard->setReg(inst->renamedDestRegIdx(i)); } - producerInst[tid]++; - consumerInst[tid]+= dependents; + if (dependents) { + producerInst[tid]++; + consumerInst[tid]+= dependents; + } writebackCount[tid]++; } } @@ -1485,6 +1493,7 @@ DefaultIEW<Impl>::tick() DPRINTF(IEW,"Processing [tid:%i]\n",tid); + // Update structures based on instructions committed. if (fromCommit->commitInfo[tid].doneSeqNum != 0 && !fromCommit->commitInfo[tid].squash && !fromCommit->commitInfo[tid].robSquashing) { diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 518de73d9..4802cbaf4 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -92,6 +92,9 @@ class InstructionQueue /** Pointer back to the instruction queue. */ InstructionQueue<Impl> *iqPtr; + /** Should the FU be added to the list to be freed upon + * completing this event. + */ bool freeFU; public: @@ -116,6 +119,7 @@ class InstructionQueue /** Registers statistics. */ void regStats(); + /** Resets all instruction queue state. */ void resetState(); /** Sets CPU pointer. */ @@ -133,10 +137,13 @@ class InstructionQueue /** Sets the global time buffer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + /** Switches out the instruction queue. */ void switchOut(); + /** Takes over execution from another CPU's thread. */ void takeOverFrom(); + /** Returns if the IQ is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Number of entries needed for given amount of threads. */ @@ -171,6 +178,9 @@ class InstructionQueue */ void insertBarrier(DynInstPtr &barr_inst); + /** Returns the oldest scheduled instruction, and removes it from + * the list of instructions waiting to execute. + */ DynInstPtr getInstToExecute(); /** @@ -274,13 +284,15 @@ class InstructionQueue /** List of all the instructions in the IQ (some of which may be issued). */ std::list<DynInstPtr> instList[Impl::MaxThreads]; + /** List of instructions that are ready to be executed. */ std::list<DynInstPtr> instsToExecute; /** - * Struct for comparing entries to be added to the priority queue. This - * gives reverse ordering to the instructions in terms of sequence - * numbers: the instructions with smaller sequence numbers (and hence - * are older) will be at the top of the priority queue. + * Struct for comparing entries to be added to the priority queue. + * This gives reverse ordering to the instructions in terms of + * sequence numbers: the instructions with smaller sequence + * numbers (and hence are older) will be at the top of the + * priority queue. */ struct pqCompare { bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const @@ -393,6 +405,7 @@ class InstructionQueue */ unsigned commitToIEWDelay; + /** Is the IQ switched out. */ bool switchedOut; /** The sequence number of the squashed instruction. */ @@ -460,19 +473,28 @@ class InstructionQueue */ Stats::Scalar<> iqSquashedNonSpecRemoved; + /** Distribution of number of instructions in the queue. */ Stats::VectorDistribution<> queueResDist; + /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; + /** Distribution of the cycles it takes to issue an instruction. */ Stats::VectorDistribution<> issueDelayDist; + /** Number of times an instruction could not be issued because a + * FU was busy. + */ Stats::Vector<> statFuBusy; // Stats::Vector<> dist_unissued; + /** Stat for total number issued for each instruction type. */ Stats::Vector2d<> statIssuedInstType; + /** Number of instructions issued per cycle. */ Stats::Formula issueRate; // Stats::Formula issue_stores; // Stats::Formula issue_op_rate; - Stats::Vector<> fuBusy; //cumulative fu busy - + /** Number of times the FU was busy. */ + Stats::Vector<> fuBusy; + /** Number of times the FU was busy per instruction issued. */ Stats::Formula fuBusyRate; }; diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index f1dc4e01f..d677a259c 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -151,8 +151,10 @@ template <class Impl> InstructionQueue<Impl>::~InstructionQueue() { dependGraph.reset(); +#ifdef DEBUG cprintf("Nodes traversed: %i, removed: %i\n", dependGraph.nodesTraversed, dependGraph.nodesRemoved); +#endif } template <class Impl> @@ -669,14 +671,8 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx) // @todo: Ensure that these FU Completions happen at the beginning // of a cycle, otherwise they could add too many instructions to // the queue. - // @todo: This could break if there's multiple multi-cycle ops - // finishing on this cycle. Maybe implement something like - // instToCommit in iew_impl.hh. issueToExecuteQueue->access(0)->size++; instsToExecute.push_back(inst); -// int &size = issueToExecuteQueue->access(0)->size; - -// issueToExecuteQueue->access(0)->insts[size++] = inst; } // @todo: Figure out a better way to remove the squashed items from the @@ -742,9 +738,10 @@ InstructionQueue<Impl>::scheduleReadyInsts() } } + // If we have an instruction that doesn't require a FU, or a + // valid FU, then schedule for execution. if (idx == -2 || idx != -1) { if (op_latency == 1) { -// i2e_info->insts[exec_queue_slot++] = issuing_inst; i2e_info->size++; instsToExecute.push_back(issuing_inst); @@ -762,14 +759,10 @@ InstructionQueue<Impl>::scheduleReadyInsts() // @todo: Enforce that issue_latency == 1 or op_latency if (issue_latency > 1) { + // If FU isn't pipelined, then it must be freed + // upon the execution completing. execution->setFreeFU(); } else { - // @todo: Not sure I'm accounting for the - // multi-cycle op in a pipelined FU properly, or - // the number of instructions issued in one cycle. -// i2e_info->insts[exec_queue_slot++] = issuing_inst; -// i2e_info->size++; - // Add the FU onto the list of FU's to be freed next cycle. fuPool->freeUnitNextCycle(idx); } @@ -814,6 +807,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() numIssuedDist.sample(total_issued); iqInstsIssued+= total_issued; + // If we issued any instructions, tell the CPU we had activity. if (total_issued) { cpu->activityThisCycle(); } else { @@ -1364,4 +1358,45 @@ InstructionQueue<Impl>::dumpInsts() ++num; } } + + cprintf("Insts to Execute list:\n"); + + int num = 0; + int valid_num = 0; + ListIt inst_list_it = instsToExecute.begin(); + + while (inst_list_it != instsToExecute.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed + // still count towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } } diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh index a1eeccbe7..b321d4590 100644 --- a/cpu/o3/lsq.hh +++ b/cpu/o3/lsq.hh @@ -49,6 +49,7 @@ class LSQ { typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; + /** SMT policy. */ enum LSQPolicy { Dynamic, Partitioned, @@ -69,8 +70,9 @@ class LSQ { void setIEW(IEW *iew_ptr); /** Sets the page table pointer. */ // void setPageTable(PageTable *pt_ptr); - + /** Switches out the LSQ. */ void switchOut(); + /** Takes over execution from another CPU's thread. */ void takeOverFrom(); /** Number of entries needed for the given amount of threads.*/ @@ -95,9 +97,6 @@ class LSQ { /** Executes a load. */ Fault executeLoad(DynInstPtr &inst); - Fault executeLoad(int lq_idx, unsigned tid) - { return thread[tid].executeLoad(lq_idx); } - /** Executes a store. */ Fault executeStore(DynInstPtr &inst); diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index 942b4583d..a6afff743 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -112,10 +112,13 @@ class LSQUnit { /** Sets the page table pointer. */ // void setPageTable(PageTable *pt_ptr); + /** Switches out LSQ unit. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); + /** Returns if the LSQ is switched out. */ bool isSwitchedOut() { return switchedOut; } /** Ticks the LSQ unit, which in this case only resets the number of @@ -180,12 +183,15 @@ class LSQUnit { bool loadBlocked() { return isLoadBlocked; } + /** Clears the signal that a load became blocked. */ void clearLoadBlocked() { isLoadBlocked = false; } + /** Returns if the blocked load was handled. */ bool isLoadBlockedHandled() { return loadBlockedHandled; } + /** Records the blocked load as being handled. */ void setLoadBlockedHandled() { loadBlockedHandled = true; } @@ -331,6 +337,7 @@ class LSQUnit { /** The number of used cache ports in this cycle. */ int usedPorts; + /** Is the LSQ switched out. */ bool switchedOut; //list<InstSeqNum> mshrSeqNums; @@ -350,8 +357,10 @@ class LSQUnit { /** Whether or not a load is blocked due to the memory system. */ bool isLoadBlocked; + /** Has the blocked load been handled. */ bool loadBlockedHandled; + /** The sequence number of the blocked load. */ InstSeqNum blockedLoadSeqNum; /** The oldest load that caused a memory ordering violation. */ @@ -452,10 +461,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) cpu->lockFlag = true; } #endif - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; while (store_idx != -1) { // End once we've reached the top of the LSQ diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 10f2b5572..4ee8bb234 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -477,7 +477,6 @@ LSQUnit<Impl>::commitLoad() DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", loadQueue[loadHead]->readPC()); - loadQueue[loadHead] = NULL; incrLdIdx(loadHead); diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh index acbe08ec2..bb0406de1 100644 --- a/cpu/o3/mem_dep_unit.hh +++ b/cpu/o3/mem_dep_unit.hh @@ -84,8 +84,10 @@ class MemDepUnit { /** Registers statistics. */ void regStats(); + /** Switches out the memory dependence predictor. */ void switchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Sets the pointer to the IQ. */ @@ -155,10 +157,12 @@ class MemDepUnit { : inst(new_inst), regsReady(false), memDepReady(false), completed(false), squashed(false) { +#ifdef DEBUG ++memdep_count; DPRINTF(MemDepUnit, "Memory dependency entry created. " "memdep_count=%i\n", memdep_count); +#endif } /** Frees any pointers. */ @@ -167,11 +171,12 @@ class MemDepUnit { for (int i = 0; i < dependInsts.size(); ++i) { dependInsts[i] = NULL; } - +#ifdef DEBUG --memdep_count; DPRINTF(MemDepUnit, "Memory dependency entry deleted. " "memdep_count=%i\n", memdep_count); +#endif } /** Returns the name of the memory dependence entry. */ @@ -196,9 +201,11 @@ class MemDepUnit { bool squashed; /** For debugging. */ +#ifdef DEBUG static int memdep_count; static int memdep_insert; static int memdep_erase; +#endif }; /** Finds the memory dependence entry in the hash map. */ @@ -227,9 +234,13 @@ class MemDepUnit { */ MemDepPred depPred; + /** Is there an outstanding load barrier that loads must wait on. */ bool loadBarrier; + /** The sequence number of the load barrier. */ InstSeqNum loadBarrierSN; + /** Is there an outstanding store barrier that loads must wait on. */ bool storeBarrier; + /** The sequence number of the store barrier. */ InstSeqNum storeBarrierSN; /** Pointer to the IQ. */ diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh index 8b195baab..595e9293f 100644 --- a/cpu/o3/mem_dep_unit_impl.hh +++ b/cpu/o3/mem_dep_unit_impl.hh @@ -105,6 +105,7 @@ template <class MemDepPred, class Impl> void MemDepUnit<MemDepPred, Impl>::switchOut() { + // Clear any state. for (int i = 0; i < Impl::MaxThreads; ++i) { instList[i].clear(); } @@ -116,6 +117,7 @@ template <class MemDepPred, class Impl> void MemDepUnit<MemDepPred, Impl>::takeOverFrom() { + // Be sure to reset all state. loadBarrier = storeBarrier = false; loadBarrierSN = storeBarrierSN = 0; depPred.clear(); @@ -146,7 +148,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) inst_entry->listIt = --(instList[tid].end()); // Check any barriers and the dependence predictor for any - // producing stores. + // producing memrefs/stores. InstSeqNum producing_store; if (inst->isLoad() && loadBarrier) { producing_store = loadBarrierSN; @@ -253,6 +255,7 @@ void MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst) { InstSeqNum barr_sn = barr_inst->seqNum; + // Memory barriers block loads and stores, write barriers only stores. if (barr_inst->isMemBarrier()) { loadBarrier = true; loadBarrierSN = barr_sn; @@ -330,6 +333,7 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst) DynInstPtr temp_inst; bool found_inst = false; + // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { temp_inst = instsToReplay.front(); diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index 3f1a27bb5..4912431ad 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -155,10 +155,13 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Switches out the rename stage. */ void switchOut(); + /** Completes the switch out. */ void doSwitchOut(); + /** Takes over from another CPU's thread. */ void takeOverFrom(); /** Squashes all instructions in a thread. */ @@ -243,8 +246,10 @@ class DefaultRename /** Checks if any stages are telling rename to block. */ bool checkStall(unsigned tid); + /** Gets the number of free entries for a specific thread. */ void readFreeEntries(unsigned tid); + /** Checks the signals and updates the status. */ bool checkSignalsAndUpdate(unsigned tid); /** Either serializes on the next instruction available in the InstQueue, @@ -454,8 +459,11 @@ class DefaultRename Stats::Scalar<> renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ Stats::Scalar<> renameUndoneMaps; + /** Number of serialize instructions handled. */ Stats::Scalar<> renamedSerializing; + /** Number of instructions marked as temporarily serializing. */ Stats::Scalar<> renamedTempSerializing; + /** Number of instructions inserted into skid buffers. */ Stats::Scalar<> renameSkidInsts; }; diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index b4f1077d1..829c99584 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -258,6 +258,7 @@ template <class Impl> void DefaultRename<Impl>::switchOut() { + // Rename is ready to switch out at any time. cpu->signalSwitched(); } @@ -265,6 +266,7 @@ template <class Impl> void DefaultRename<Impl>::doSwitchOut() { + // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin(); diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh index d7e49ae83..1ac627264 100644 --- a/cpu/o3/rename_map.hh +++ b/cpu/o3/rename_map.hh @@ -62,12 +62,13 @@ class SimpleRenameMap typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo; public: - //Constructor - SimpleRenameMap() {}; + /** Default constructor. init() must be called prior to use. */ + SimpleRenameMap() {}; /** Destructor. */ ~SimpleRenameMap(); + /** Initializes rename map with given parameters. */ void init(unsigned _numLogicalIntRegs, unsigned _numPhysicalIntRegs, PhysRegIndex &_int_reg_start, @@ -84,6 +85,7 @@ class SimpleRenameMap int id, bool bindRegs); + /** Sets the free list used with this rename map. */ void setFreeList(SimpleFreeList *fl_ptr); //Tell rename map to get a free physical register for a given @@ -149,7 +151,6 @@ class SimpleRenameMap { } }; - //Change this to private private: /** Integer rename map. */ std::vector<RenameEntry> intRenameMap; diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh index e05eebe5a..bdbdde32f 100644 --- a/cpu/o3/rob.hh +++ b/cpu/o3/rob.hh @@ -95,8 +95,10 @@ class ROB */ void setActiveThreads(std::list<unsigned>* at_ptr); + /** Switches out the ROB. */ void switchOut(); + /** Takes over another CPU's thread. */ void takeOverFrom(); /** Function to insert an instruction into the ROB. Note that whatever @@ -298,6 +300,7 @@ class ROB /** Number of instructions in the ROB. */ int numInstsInROB; + /** Dummy instruction returned if there are no insts left. */ DynInstPtr dummyInst; private: diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc index 0c957c8c7..67ccf1b55 100644 --- a/cpu/o3/store_set.cc +++ b/cpu/o3/store_set.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "base/intmath.hh" #include "base/trace.hh" #include "cpu/o3/store_set.hh" @@ -36,6 +37,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", SSITSize, LFSTSize); + if (!isPowerOf2(SSITSize)) { + fatal("Invalid SSIT size!\n"); + } + SSIT.resize(SSITSize); validSSIT.resize(SSITSize); @@ -43,6 +48,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) for (int i = 0; i < SSITSize; ++i) validSSIT[i] = false; + if (!isPowerOf2(LFSTSize)) { + fatal("Invalid LFST size!\n"); + } + LFST.resize(LFSTSize); validLFST.resize(LFSTSize); @@ -318,3 +327,19 @@ StoreSet::clear() storeList.clear(); } + +void +StoreSet::dump() +{ + cprintf("storeList.size(): %i\n", storeList.size()); + SeqNumMapIt store_list_it = storeList.begin(); + + int num = 0; + + while (store_list_it != storeList.end()) { + cprintf("%i: [sn:%lli] SSID:%i\n", + num, (*store_list_it).first, (*store_list_it).second); + num++; + store_list_it++; + } +} diff --git a/cpu/o3/store_set.hh b/cpu/o3/store_set.hh index 7189db3ab..5f875131c 100644 --- a/cpu/o3/store_set.hh +++ b/cpu/o3/store_set.hh @@ -44,58 +44,98 @@ struct ltseqnum { } }; +/** + * Implements a store set predictor for determining if memory + * instructions are dependent upon each other. See paper "Memory + * Dependence Prediction using Store Sets" by Chrysos and Emer. SSID + * stands for Store Set ID, SSIT stands for Store Set ID Table, and + * LFST is Last Fetched Store Table. + */ class StoreSet { public: typedef unsigned SSID; public: + /** Default constructor. init() must be called prior to use. */ StoreSet() { }; + /** Creates store set predictor with given table sizes. */ StoreSet(int SSIT_size, int LFST_size); + /** Default destructor. */ ~StoreSet(); + /** Initializes the store set predictor with the given table sizes. */ void init(int SSIT_size, int LFST_size); + /** Records a memory ordering violation between the younger load + * and the older store. */ void violation(Addr store_PC, Addr load_PC); + /** Inserts a load into the store set predictor. This does nothing but + * is included in case other predictors require a similar function. + */ void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + /** Inserts a store into the store set predictor. Updates the + * LFST if the store has a valid SSID. */ void insertStore(Addr store_PC, InstSeqNum store_seq_num, unsigned tid); + /** Checks if the instruction with the given PC is dependent upon + * any store. @return Returns the sequence number of the store + * instruction this PC is dependent upon. Returns 0 if none. + */ InstSeqNum checkInst(Addr PC); + /** Records this PC/sequence number as issued. */ void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + /** Squashes for a specific thread until the given sequence number. */ void squash(InstSeqNum squashed_num, unsigned tid); + /** Resets all tables. */ void clear(); + /** Debug function to dump the contents of the store list. */ + void dump(); + private: + /** Calculates the index into the SSIT based on the PC. */ inline int calcIndex(Addr PC) { return (PC >> offsetBits) & indexMask; } + /** Calculates a Store Set ID based on the PC. */ inline SSID calcSSID(Addr PC) { return ((PC ^ (PC >> 10)) % LFSTSize); } + /** The Store Set ID Table. */ std::vector<SSID> SSIT; + /** Bit vector to tell if the SSIT has a valid entry. */ std::vector<bool> validSSIT; + /** Last Fetched Store Table. */ std::vector<InstSeqNum> LFST; + /** Bit vector to tell if the LFST has a valid entry. */ std::vector<bool> validLFST; + /** Map of stores that have been inserted into the store set, but + * not yet issued or squashed. + */ std::map<InstSeqNum, int, ltseqnum> storeList; typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt; + /** Store Set ID Table size, in entries. */ int SSITSize; + /** Last Fetched Store Table size, in entries. */ int LFSTSize; + /** Mask to obtain the index. */ int indexMask; // HACK: Hardcoded for now. diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh index 2c9788e4b..3f1208ea0 100644 --- a/cpu/o3/thread_state.hh +++ b/cpu/o3/thread_state.hh @@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState { typedef ExecContext::Status Status; typedef typename Impl::FullCPU FullCPU; + /** Current status of the thread. */ Status _status; - // Current instruction + /** Current instruction the thread is committing. Only set and + * used for DTB faults currently. + */ TheISA::MachInst inst; + private: + /** Pointer to the CPU. */ FullCPU *cpu; public: - + /** Whether or not the thread is currently in syscall mode, and + * thus able to be externally updated without squashing. + */ bool inSyscall; + /** Whether or not the thread is currently waiting on a trap, and + * thus able to be externally updated without squashing. + */ bool trapPending; #if FULL_SYSTEM @@ -88,31 +98,44 @@ struct O3ThreadState : public ThreadState { { } #endif + /** Pointer to the ExecContext of this thread. @todo: Don't call + this a proxy.*/ ExecContext *xcProxy; + /** Returns a pointer to the XC of this thread. */ ExecContext *getXCProxy() { return xcProxy; } + /** Returns the status of this thread. */ Status status() const { return _status; } + /** Sets the status of this thread. */ void setStatus(Status new_status) { _status = new_status; } #if !FULL_SYSTEM + /** Returns if this address is a valid instruction address. */ bool validInstAddr(Addr addr) { return process->validInstAddr(addr); } + /** Returns if this address is a valid data address. */ bool validDataAddr(Addr addr) { return process->validDataAddr(addr); } #endif - bool misspeculating() { return false; } - + /** Sets the current instruction being committed. */ void setInst(TheISA::MachInst _inst) { inst = _inst; } + /** Reads the number of instructions functionally executed and + * committed. + */ Counter readFuncExeInst() { return funcExeInst; } + /** Sets the total number of instructions functionally executed + * and committed. + */ void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } #if !FULL_SYSTEM + /** Handles the syscall. */ void syscall() { process->syscall(xcProxy); } #endif }; |