From 7940c10ace28d5b93a61d4d278e6647e0c497149 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 2 Jun 2006 18:15:20 -0400 Subject: Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate. Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding. src/SConscript: Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future. src/base/traceflags.py: Include new traces flags, fix up merge mess up. src/cpu/SConscript: Include the base_dyn_inst.cc as one of othe sources. Don't compile the Ozone CPU for now. src/cpu/base.cc: Remove an extra } from the merge. src/cpu/base_dyn_inst.cc: Fixes to make compiling work. Don't instantiate the OzoneCPU for now. src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/btb.hh: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/sat_counter.hh: src/cpu/op_class.hh: src/cpu/ozone/cpu.hh: src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: src/cpu/checker/exec_context.hh: src/cpu/checker/o3_cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/mem/request.hh: src/cpu/o3/fu_pool.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/back_end.hh: src/cpu/ozone/dyn_inst.cc: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/inorder_back_end.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/thread_state.hh: Fixes to get compiling to work. src/cpu/o3/alpha_cpu.hh: Fixes to get compiling to work. Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs. src/cpu/o3/alpha_dyn_inst_impl.hh: Fixes to get compiling to work. Pass in the packet to the completeAcc function. Fix up syscall function. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8 --- src/cpu/o3/2bit_local_pred.cc | 1 + src/cpu/o3/alpha_cpu.hh | 80 ++-- src/cpu/o3/alpha_cpu_builder.cc | 13 +- src/cpu/o3/alpha_cpu_impl.hh | 86 ++-- src/cpu/o3/alpha_dyn_inst.hh | 9 +- src/cpu/o3/alpha_dyn_inst_impl.hh | 12 +- src/cpu/o3/alpha_params.hh | 9 +- src/cpu/o3/bpred_unit.cc | 4 +- src/cpu/o3/btb.hh | 1 + src/cpu/o3/commit.hh | 4 - src/cpu/o3/commit_impl.hh | 3 +- src/cpu/o3/cpu.cc | 21 +- src/cpu/o3/cpu.hh | 22 +- src/cpu/o3/dep_graph.hh | 213 ++++++++++ src/cpu/o3/fetch.hh | 67 +-- src/cpu/o3/fetch_impl.hh | 180 ++++---- src/cpu/o3/free_list.hh | 1 + src/cpu/o3/fu_pool.cc | 295 +++++++++++++ src/cpu/o3/fu_pool.hh | 162 +++++++ src/cpu/o3/iew.hh | 19 - src/cpu/o3/iew_impl.hh | 56 +-- src/cpu/o3/inst_queue.hh | 2 +- src/cpu/o3/inst_queue_impl.hh | 3 +- src/cpu/o3/lsq.cc | 36 ++ src/cpu/o3/lsq.hh | 324 ++++++++++++++ src/cpu/o3/lsq_impl.hh | 538 +++++++++++++++++++++++ src/cpu/o3/lsq_unit.cc | 36 ++ src/cpu/o3/lsq_unit.hh | 629 +++++++++++++++++++++++++++ src/cpu/o3/lsq_unit_impl.hh | 866 ++++++++++++++++++++++++++++++++++++++ src/cpu/o3/regfile.hh | 36 +- src/cpu/o3/sat_counter.hh | 1 + src/cpu/o3/scoreboard.cc | 106 +++++ src/cpu/o3/scoreboard.hh | 114 +++++ src/cpu/o3/thread_state.hh | 112 +++++ 34 files changed, 3743 insertions(+), 318 deletions(-) create mode 100644 src/cpu/o3/dep_graph.hh create mode 100644 src/cpu/o3/fu_pool.cc create mode 100644 src/cpu/o3/fu_pool.hh create mode 100644 src/cpu/o3/lsq.cc create mode 100644 src/cpu/o3/lsq.hh create mode 100644 src/cpu/o3/lsq_impl.hh create mode 100644 src/cpu/o3/lsq_unit.cc create mode 100644 src/cpu/o3/lsq_unit.hh create mode 100644 src/cpu/o3/lsq_unit_impl.hh create mode 100644 src/cpu/o3/scoreboard.cc create mode 100644 src/cpu/o3/scoreboard.hh create mode 100644 src/cpu/o3/thread_state.hh (limited to 'src/cpu/o3') diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc index c3fb2fdb8..2f768fd34 100644 --- a/src/cpu/o3/2bit_local_pred.cc +++ b/src/cpu/o3/2bit_local_pred.cc @@ -27,6 +27,7 @@ */ #include "base/intmath.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "cpu/o3/2bit_local_pred.hh" diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index 1bab0703e..fe88a1acc 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -39,11 +39,15 @@ namespace Kernel { class Statistics; }; +class TranslatingPort; + template class AlphaFullCPU : public FullO3CPU { protected: typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; typedef TheISA::RegFile RegFile; typedef TheISA::MiscRegFile MiscRegFile; @@ -69,7 +73,7 @@ class AlphaFullCPU : public FullO3CPU virtual int readCpuId() { return cpu->cpu_id; } - virtual FunctionalMemory *getMemPtr() { return thread->mem; } + virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; } #if FULL_SYSTEM virtual System *getSystemPtr() { return cpu->system; } @@ -135,19 +139,23 @@ class AlphaFullCPU : public FullO3CPU virtual uint64_t readIntReg(int reg_idx); - virtual float readFloatRegSingle(int reg_idx); + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); - virtual double readFloatRegDouble(int reg_idx); + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - virtual uint64_t readFloatRegInt(int reg_idx); + virtual FloatRegBits readFloatRegBits(int reg_idx); virtual void setIntReg(int reg_idx, uint64_t val); - virtual void setFloatRegSingle(int reg_idx, float val); + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); - virtual void setFloatRegDouble(int reg_idx, double val); + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - virtual void setFloatRegInt(int reg_idx, uint64_t val); + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); virtual uint64_t readPC() { return cpu->readPC(thread->tid); } @@ -159,6 +167,15 @@ class AlphaFullCPU : public FullO3CPU virtual void setNextPC(uint64_t val); + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { panic("Alpha has no NextNPC!"); } + virtual MiscReg readMiscReg(int misc_reg) { return cpu->readMiscReg(misc_reg, thread->tid); } @@ -193,10 +210,14 @@ class AlphaFullCPU : public FullO3CPU virtual void setSyscallReturn(SyscallReturn return_value); - virtual void syscall() { return cpu->syscall(thread->tid); } + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->tid); } virtual Counter readFuncExeInst() { return thread->funcExeInst; } #endif + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } }; #if FULL_SYSTEM @@ -211,52 +232,43 @@ class AlphaFullCPU : public FullO3CPU #if FULL_SYSTEM /** Translates instruction requestion. */ - Fault translateInstReq(MemReqPtr &req) + Fault translateInstReq(RequestPtr &req) { return itb->translate(req); } /** Translates data read request. */ - Fault translateDataReadReq(MemReqPtr &req) + Fault translateDataReadReq(RequestPtr &req) { return dtb->translate(req, false); } /** Translates data write request. */ - Fault translateDataWriteReq(MemReqPtr &req) + Fault translateDataWriteReq(RequestPtr &req) { return dtb->translate(req, true); } #else - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(MemReqPtr &req) + Fault translateInstReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(MemReqPtr &req) + Fault translateDataReadReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(MemReqPtr &req) + Fault translateDataWriteReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } #endif @@ -298,7 +310,7 @@ class AlphaFullCPU : public FullO3CPU /** Executes a syscall. * @todo: Determine if this needs to be virtual. */ - void syscall(int thread_num); + void syscall(int64_t callnum, int thread_num); /** Gets a syscall argument. */ IntReg getSyscallArg(int i, int tid); @@ -311,7 +323,7 @@ class AlphaFullCPU : public FullO3CPU /** Read from memory function. */ template - Fault read(MemReqPtr &req, T &data) + Fault read(RequestPtr &req, T &data) { #if 0 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA @@ -338,14 +350,14 @@ class AlphaFullCPU : public FullO3CPU /** CPU read function, forwards read to LSQ. */ template - Fault read(MemReqPtr &req, T &data, int load_idx) + Fault read(RequestPtr &req, T &data, int load_idx) { return this->iew.ldstQueue.read(req, data, load_idx); } /** Write to memory function. */ template - Fault write(MemReqPtr &req, T &data) + Fault write(RequestPtr &req, T &data) { #if 0 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA @@ -417,7 +429,7 @@ class AlphaFullCPU : public FullO3CPU /** CPU write function, forwards write to LSQ. */ template - Fault write(MemReqPtr &req, T &data, int store_idx) + Fault write(RequestPtr &req, T &data, int store_idx) { return this->iew.ldstQueue.write(req, data, store_idx); } diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index b0d812edc..6ac408364 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -33,7 +33,6 @@ #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_params.hh" #include "cpu/o3/fu_pool.hh" -#include "mem/cache/base_cache.hh" #include "sim/builder.hh" class DerivAlphaFullCPU : public AlphaFullCPU @@ -60,7 +59,7 @@ SimObjectVectorParam workload; //SimObjectParam page_table; #endif // FULL_SYSTEM -SimObjectParam mem; +SimObjectParam mem; SimObjectParam checker; @@ -69,9 +68,6 @@ Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; -SimObjectParam icache; -SimObjectParam dcache; - Param cachePorts; Param decodeToFetchDelay; @@ -169,7 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) // INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM - INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM(mem, "Memory"), INIT_PARAM_DFLT(checker, "Checker CPU", NULL), @@ -188,9 +184,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "count", 0), - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), @@ -327,8 +320,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) // // Caches // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; params->cachePorts = cachePorts; params->decodeToFetchDelay = decodeToFetchDelay; diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index f7f0a3842..a890cfd90 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -31,7 +31,6 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/checker/exec_context.hh" -#include "mem/mem_interface.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" @@ -68,11 +67,9 @@ AlphaFullCPU::AlphaFullCPU(Params *params) this->thread[i]->setStatus(ExecContext::Suspended); #else if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " - "process is %#x", - i, params->workload[i]->prog_entry, this->thread[i]); + DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", + i, this->thread[i]); this->thread[i] = new Thread(this, i, params->workload[i], i); - assert(params->workload[i]->getMemory() != NULL); this->thread[i]->setStatus(ExecContext::Suspended); //usedTids[i] = true; @@ -160,7 +157,7 @@ void AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) { // some things should already be set up - assert(getMemPtr() == old_context->getMemPtr()); + assert(getMemPort() == old_context->getMemPort()); #if FULL_SYSTEM assert(getSystemPtr() == old_context->getSystemPtr()); #else @@ -366,15 +363,14 @@ AlphaFullCPU::AlphaXC::copyArchRegs(ExecContext *xc) } // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - this->cpuXC->setFloatRegBits(i, - this->regFile.readFloatRegBits(renamed_reg)); + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + xc->readFloatRegBits(i)); } // Copy the misc regs. - cpu->regFile.miscRegs[tid].copyMiscRegs(xc); + copyMiscRegs(xc, this); // Then finally set the PC and the next PC. cpu->setPC(xc->readPC(), tid); @@ -398,24 +394,40 @@ AlphaFullCPU::AlphaXC::readIntReg(int reg_idx) } template -float -AlphaFullCPU::AlphaXC::readFloatRegSingle(int reg_idx) +FloatReg +AlphaFullCPU::AlphaXC::readFloatReg(int reg_idx, int width) { DPRINTF(Fault, "Reading float register through the XC!\n"); - return cpu->readArchFloatRegSingle(reg_idx, thread->tid); + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->tid); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->tid); + default: + panic("Unsupported width!"); + return 0; + } } template -double -AlphaFullCPU::AlphaXC::readFloatRegDouble(int reg_idx) +FloatReg +AlphaFullCPU::AlphaXC::readFloatReg(int reg_idx) { DPRINTF(Fault, "Reading float register through the XC!\n"); - return cpu->readArchFloatRegDouble(reg_idx, thread->tid); + return cpu->readArchFloatRegSingle(reg_idx, thread->tid); } template -uint64_t -AlphaFullCPU::AlphaXC::readFloatRegInt(int reg_idx) +FloatRegBits +AlphaFullCPU::AlphaXC::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the XC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->tid); +} + +template +FloatRegBits +AlphaFullCPU::AlphaXC::readFloatRegBits(int reg_idx) { DPRINTF(Fault, "Reading floatint register through the XC!\n"); return cpu->readArchFloatRegInt(reg_idx, thread->tid); @@ -435,10 +447,17 @@ AlphaFullCPU::AlphaXC::setIntReg(int reg_idx, uint64_t val) template void -AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) +AlphaFullCPU::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width) { DPRINTF(Fault, "Setting float register through the XC!\n"); - cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + break; + } if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); @@ -447,10 +466,23 @@ AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) template void -AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) +AlphaFullCPU::AlphaXC::setFloatReg(int reg_idx, FloatReg val) { DPRINTF(Fault, "Setting float register through the XC!\n"); - cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromXC(thread->tid); + } +} + +template +void +AlphaFullCPU::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the XC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->tid); if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); @@ -459,7 +491,7 @@ AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) template void -AlphaFullCPU::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val) +AlphaFullCPU::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val) { DPRINTF(Fault, "Setting floatint register through the XC!\n"); cpu->setArchFloatRegInt(reg_idx, val, thread->tid); @@ -723,7 +755,7 @@ AlphaFullCPU::processInterrupts() template void -AlphaFullCPU::syscall(int tid) +AlphaFullCPU::syscall(int64_t callnum, int tid) { DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); @@ -734,7 +766,7 @@ AlphaFullCPU::syscall(int tid) ++(this->thread[tid]->funcExeInst); // Execute the actual syscall. - this->thread[tid]->syscall(); + this->thread[tid]->syscall(callnum); // Decrease funcExeInst by one as the normal commit will handle // incrementing it. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh index b03c8c337..f289bbf0d 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha_dyn_inst.hh @@ -29,11 +29,14 @@ #ifndef __CPU_O3_ALPHA_DYN_INST_HH__ #define __CPU_O3_ALPHA_DYN_INST_HH__ +#include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/alpha_cpu.hh" #include "cpu/o3/alpha_impl.hh" +class Packet; + /** * Mostly implementation & ISA specific AlphaDynInst. As with most * other classes in the new CPU model, it is templated on the Impl to @@ -56,6 +59,8 @@ class AlphaDynInst : public BaseDynInst typedef TheISA::RegIndex RegIndex; /** Integer register index type. */ typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -79,7 +84,7 @@ class AlphaDynInst : public BaseDynInst Fault initiateAcc(); /** Completes the access. Only valid for memory operations. */ - Fault completeAcc(); + Fault completeAcc(Packet *pkt); private: /** Initializes variables. */ @@ -123,7 +128,7 @@ class AlphaDynInst : public BaseDynInst bool simPalCheck(int palFunc); #else /** Calls a syscall. */ - void syscall(); + void syscall(int64_t callnum); #endif private: diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh index 541d5ab82..16c236b4c 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -96,15 +96,13 @@ AlphaDynInst::initiateAcc() template Fault -AlphaDynInst::completeAcc() +AlphaDynInst::completeAcc(Packet *pkt) { if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); } else { panic("Unknown type!"); @@ -168,9 +166,9 @@ AlphaDynInst::simPalCheck(int palFunc) #else template void -AlphaDynInst::syscall() +AlphaDynInst::syscall(int64_t callnum) { - this->cpu->syscall(this->threadNumber); + this->cpu->syscall(callnum, this->threadNumber); } #endif diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index e3acf2c05..04366e8dd 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -35,8 +35,7 @@ class AlphaDTB; class AlphaITB; class FUPool; -class FunctionalMemory; -class MemInterface; +class MemObject; class Process; class System; @@ -60,7 +59,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params //Page Table // PageTable *pTable; - FunctionalMemory *mem; + MemObject *mem; BaseCPU *checker; @@ -69,8 +68,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params // // Caches // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; +// MemInterface *icacheInterface; +// MemInterface *dcacheInterface; unsigned cachePorts; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index 92344111f..dcc5ceb80 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -30,8 +30,8 @@ #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class TwobitBPredUnit; template class TwobitBPredUnit; -template class TwobitBPredUnit; +//template class TwobitBPredUnit; diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh index b9ff42573..c7dc1808b 100644 --- a/src/cpu/o3/btb.hh +++ b/src/cpu/o3/btb.hh @@ -31,6 +31,7 @@ // For Addr type. #include "arch/isa_traits.hh" +#include "base/misc.hh" class DefaultBTB { diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 66abf8dc6..c019ef4c7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -34,7 +34,6 @@ #include "base/timebuf.hh" #include "cpu/exetrace.hh" #include "cpu/inst_seq.hh" -#include "mem/memory_interface.hh" template class O3ThreadState; @@ -301,9 +300,6 @@ class DefaultCommit /** Pointer to FullCPU. */ FullCPU *cpu; - /** Memory interface. Used for d-cache accesses. */ - MemInterface *dcacheInterface; - std::vector thread; Fault fetchFault; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 346a8bc1c..97703c430 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -64,8 +64,7 @@ DefaultCommit::TrapEvent::description() template DefaultCommit::DefaultCommit(Params *params) - : dcacheInterface(params->dcacheInterface), - squashCounter(0), + : squashCounter(0), iewToCommitDelay(params->iewToCommitDelay), commitToIEWDelay(params->commitToIEWDelay), renameToROBDelay(params->renameToROBDelay), diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ed02a845b..4e0bb2d2d 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -46,6 +46,7 @@ #include "sim/stat_control.hh" using namespace std; +using namespace TheISA; BaseFullCPU::BaseFullCPU(Params *params) : BaseCPU(params), cpu_id(0) @@ -121,14 +122,9 @@ FullO3CPU::FullO3CPU(Params *params) system(params->system), memCtrl(system->memctrl), physmem(system->physmem), - mem(params->mem), -#else -// pTable(params->pTable), - mem(params->workload[0]->getMemory()), #endif // FULL_SYSTEM + mem(params->mem), switchCount(0), - icacheInterface(params->icacheInterface), - dcacheInterface(params->dcacheInterface), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -782,6 +778,7 @@ FullO3CPU::readFloatReg(int reg_idx) template FloatRegBits FullO3CPU::readFloatRegBits(int reg_idx, int width) +{ return regFile.readFloatRegBits(reg_idx, width); } @@ -843,7 +840,7 @@ FullO3CPU::readArchFloatRegSingle(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegSingle(phys_reg); + return regFile.readFloatReg(phys_reg); } template @@ -853,7 +850,7 @@ FullO3CPU::readArchFloatRegDouble(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegDouble(phys_reg); + return regFile.readFloatReg(phys_reg, 64); } template @@ -863,7 +860,7 @@ FullO3CPU::readArchFloatRegInt(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegInt(phys_reg); + return regFile.readFloatRegBits(phys_reg); } template @@ -881,7 +878,7 @@ FullO3CPU::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegSingle(phys_reg, val); + regFile.setFloatReg(phys_reg, val); } template @@ -890,7 +887,7 @@ FullO3CPU::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegDouble(phys_reg, val); + regFile.setFloatReg(phys_reg, val, 64); } template @@ -899,7 +896,7 @@ FullO3CPU::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegInt(phys_reg, val); + regFile.setFloatRegBits(phys_reg, val); } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index bed95ad54..c791b2948 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -35,6 +35,7 @@ #include #include +#include "arch/isa_traits.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "config/full_system.hh" @@ -50,7 +51,7 @@ template class Checker; class ExecContext; -class MemInterface; +class MemObject; class Process; class BaseFullCPU : public BaseCPU @@ -63,6 +64,8 @@ class BaseFullCPU : public BaseCPU void regStats(); + int readCpuId() { return cpu_id; } + protected: int cpu_id; }; @@ -71,6 +74,9 @@ template class FullO3CPU : public BaseFullCPU { public: + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + // Typedefs from the Impl here. typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; @@ -226,14 +232,6 @@ class FullO3CPU : public BaseFullCPU int getDataAsid(unsigned tid) { return regFile.miscRegs[tid].getDataAsid(); } #else - /** Check if this address is a valid instruction address. */ - bool validInstAddr(Addr addr,unsigned tid) - { return thread[tid]->validInstAddr(addr); } - - /** Check if this address is a valid data address. */ - bool validDataAddr(Addr addr,unsigned tid) - { return thread[tid]->validDataAddr(addr); } - /** Get instruction asid. */ int getInstAsid(unsigned tid) { return thread[tid]->asid; } @@ -259,13 +257,13 @@ class FullO3CPU : public BaseFullCPU void setIntReg(int reg_idx, uint64_t val); - void setFloatReg(int reg_idx, FloatReg val, int width); + void setFloatReg(int reg_idx, FloatReg val); void setFloatReg(int reg_idx, FloatReg val, int width); void setFloatRegBits(int reg_idx, FloatRegBits val); - void setFloatRegBits(int reg_idx, FloatRegBits val); + void setFloatRegBits(int reg_idx, FloatRegBits val, int width); uint64_t readArchIntReg(int reg_idx, unsigned tid); @@ -464,7 +462,7 @@ class FullO3CPU : public BaseFullCPU #endif /** Pointer to memory. */ - FunctionalMemory *mem; + MemObject *mem; Sampler *sampler; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh new file mode 100644 index 000000000..f8ae38da4 --- /dev/null +++ b/src/cpu/o3/dep_graph.hh @@ -0,0 +1,213 @@ + +#ifndef __CPU_O3_DEP_GRAPH_HH__ +#define __CPU_O3_DEP_GRAPH_HH__ + +#include "cpu/o3/comm.hh" + +template +class DependencyEntry +{ + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; +}; + +template +class DependencyGraph +{ + public: + typedef DependencyEntry DepEntry; + + DependencyGraph() + : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) + { } + + void resize(int num_entries); + + void reset(); + + void insert(PhysRegIndex idx, DynInstPtr &new_inst); + + void setInst(PhysRegIndex idx, DynInstPtr &new_inst) + { dependGraph[idx].inst = new_inst; } + + void clearInst(PhysRegIndex idx) + { dependGraph[idx].inst = NULL; } + + void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + + DynInstPtr pop(PhysRegIndex idx); + + bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DepEntry *dependGraph; + + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + uint64_t nodesTraversed; + uint64_t nodesRemoved; +}; + +template +void +DependencyGraph::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph = new DepEntry[numEntries]; +} + +template +void +DependencyGraph::reset() +{ + // Clear the dependency graph + DepEntry *curr; + DepEntry *prev; + + for (int i = 0; i < numEntries; ++i) { + curr = dependGraph[i].next; + + while (curr) { + memAllocCounter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } +} + +template +void +DependencyGraph::insert(PhysRegIndex idx, DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DepEntry *new_entry = new DepEntry; + new_entry->next = dependGraph[idx].next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + dependGraph[idx].next = new_entry; + + ++memAllocCounter; +} + + +template +void +DependencyGraph::remove(PhysRegIndex idx, + DynInstPtr &inst_to_remove) +{ + DepEntry *prev = &dependGraph[idx]; + DepEntry *curr = dependGraph[idx].next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + nodesRemoved++; + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + nodesTraversed++; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --memAllocCounter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template +DynInstPtr +DependencyGraph::pop(PhysRegIndex idx) +{ + DepEntry *node; + node = dependGraph[idx].next; + DynInstPtr inst = NULL; + if (node) { + inst = node->inst; + dependGraph[idx].next = node->next; + node->inst = NULL; + memAllocCounter--; + delete node; + } + return inst; +} + +template +void +DependencyGraph::dump() +{ + DepEntry *curr; + + for (int i = 0; i < numEntries; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 3fcfdc3a1..2b1d93cb7 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -29,10 +29,12 @@ #ifndef __CPU_O3_FETCH_HH__ #define __CPU_O3_FETCH_HH__ +#include "arch/utility.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/mem_interface.hh" +#include "mem/packet.hh" +#include "mem/port.hh" #include "sim/eventq.hh" class Sampler; @@ -65,6 +67,32 @@ class DefaultFetch typedef TheISA::MachInst MachInst; typedef TheISA::ExtMachInst ExtMachInst; + class IcachePort : public Port + { + protected: + DefaultFetch *fetch; + + public: + IcachePort(DefaultFetch *_fetch) + : Port(_fetch->name() + "-iport"), fetch(_fetch) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -84,8 +112,9 @@ class DefaultFetch TrapPending, QuiescePending, SwitchOut, - IcacheMissStall, - IcacheMissComplete + IcacheWaitResponse, + IcacheRetry, + IcacheAccessComplete }; /** Fetching Policy, Add new policies here.*/ @@ -110,28 +139,6 @@ class DefaultFetch /** List that has the threads organized by priority. */ std::list priorityList; - public: - class CacheCompletionEvent : public Event - { - private: - MemReqPtr req; - /** Pointer to fetch. */ - DefaultFetch *fetch; - /** Thread id. */ -// unsigned threadId; - - public: - /** Constructs a cache completion event, which tells fetch when the - * cache miss is complete. - */ - CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch); - - /** Processes cache completion event. */ - virtual void process(); - /** Returns the description of the cache completion event. */ - virtual const char *description(); - }; - public: /** DefaultFetch constructor. */ DefaultFetch(Params *params); @@ -161,7 +168,7 @@ class DefaultFetch void initStage(); /** Processes cache completion event. */ - void processCacheCompletion(MemReqPtr &req); + void processCacheCompletion(PacketPtr pkt); void switchOut(); @@ -295,8 +302,10 @@ class DefaultFetch /** Wire used to write any information heading to decode. */ typename TimeBuffer::wire toDecode; + MemObject *mem; + /** Icache interface. */ - MemInterface *icacheInterface; + IcachePort *icachePort; /** BPredUnit. */ BPredUnit branchPred; @@ -305,8 +314,8 @@ class DefaultFetch Addr nextPC[Impl::MaxThreads]; - /** Memory request used to access cache. */ - MemReqPtr memReq[Impl::MaxThreads]; + /** Memory packet used to access cache. */ + PacketPtr memPkt[Impl::MaxThreads]; /** Variable that tracks if fetch has written to the time buffer this * cycle. Used to tell CPU if there is activity this cycle. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 1c5e508f6..a80afbcf4 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -27,12 +27,13 @@ */ #include "arch/isa_traits.hh" +#include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/o3/fetch.hh" -#include "mem/base_mem.hh" -#include "mem/mem_interface.hh" -#include "mem/mem_req.hh" +#include "mem/packet.hh" +#include "mem/request.hh" #include "sim/byteswap.hh" +#include "sim/host.hh" #include "sim/root.hh" #if FULL_SYSTEM @@ -42,42 +43,67 @@ #include "mem/functional/memory_control.hh" #include "mem/functional/physical.hh" #include "sim/system.hh" -#else // !FULL_SYSTEM -#include "mem/functional/functional.hh" #endif // FULL_SYSTEM #include using namespace std; +using namespace TheISA; template -DefaultFetch::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req, - DefaultFetch *_fetch) - : Event(&mainEventQueue, Delayed_Writeback_Pri), - req(_req), - fetch(_fetch) +Tick +DefaultFetch::IcachePort::recvAtomic(PacketPtr pkt) { - this->setFlags(Event::AutoDelete); + panic("DefaultFetch doesn't expect recvAtomic callback!"); + return curTick; } template void -DefaultFetch::CacheCompletionEvent::process() +DefaultFetch::IcachePort::recvFunctional(PacketPtr pkt) +{ + panic("DefaultFetch doesn't expect recvFunctional callback!"); +} + +template +void +DefaultFetch::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("DefaultFetch doesn't expect recvStatusChange callback!"); +} + +template +bool +DefaultFetch::IcachePort::recvTiming(Packet *pkt) { - fetch->processCacheCompletion(req); + fetch->processCacheCompletion(pkt); + return true; } template -const char * -DefaultFetch::CacheCompletionEvent::description() +void +DefaultFetch::IcachePort::recvRetry() { - return "DefaultFetch cache completion event"; + panic("DefaultFetch doesn't support retry yet."); + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit +/* + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + Packet *tmp = cpu->dcache_pkt; + if (sendTiming(tmp)) { + cpu->_status = DcacheWaitResponse; + cpu->dcache_pkt = NULL; + } +*/ } template DefaultFetch::DefaultFetch(Params *params) - : icacheInterface(params->icacheInterface), - branchPred(params), + : branchPred(params), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -122,7 +148,7 @@ DefaultFetch::DefaultFetch(Params *params) } // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = 64; // Create mask to get rid of offset bits. cacheBlkMask = (cacheBlkSize - 1); @@ -133,8 +159,7 @@ DefaultFetch::DefaultFetch(Params *params) priorityList.push_back(tid); - // Create a new memory request. - memReq[tid] = NULL; + memPkt[tid] = NULL; // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; @@ -253,6 +278,9 @@ DefaultFetch::setCPU(FullCPU *cpu_ptr) DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; + // Name is finally available, so create the port. + icachePort = new IcachePort(this); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -315,9 +343,9 @@ DefaultFetch::initStage() template void -DefaultFetch::processCacheCompletion(MemReqPtr &req) +DefaultFetch::processCacheCompletion(PacketPtr pkt) { - unsigned tid = req->thread_num; + unsigned tid = pkt->req->getThreadNum(); DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid); @@ -325,10 +353,11 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) // to return. // Can keep track of how many cache accesses go unused due to // misspeculation here. - if (fetchStatus[tid] != IcacheMissStall || - req != memReq[tid] || + if (fetchStatus[tid] != IcacheWaitResponse || + pkt != memPkt[tid] || isSwitchedOut()) { ++fetchIcacheSquashes; + delete pkt; return; } @@ -341,17 +370,19 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) switchToActive(); - // Only switch to IcacheMissComplete if we're not stalled as well. + // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { fetchStatus[tid] = Blocked; } else { - fetchStatus[tid] = IcacheMissComplete; + fetchStatus[tid] = IcacheAccessComplete; } // memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); // Reset the mem req to NULL. - memReq[tid] = NULL; + delete pkt->req; + delete pkt; + memPkt[tid] = NULL; } template @@ -475,18 +506,15 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. - memReq[tid] = new MemReq(); + // Build request here. + RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags, + fetch_PC, cpu->readCpuId(), tid); - memReq[tid]->asid = tid; - memReq[tid]->thread_num = tid; - memReq[tid]->data = new uint8_t[64]; - memReq[tid]->xc = cpu->xcBase(tid); - memReq[tid]->cmd = Read; - memReq[tid]->reset(fetch_PC, cacheBlkSize, flags); + memPkt[tid] = NULL; // Translate the instruction request. //#if FULL_SYSTEM - fault = cpu->translateInstReq(memReq[tid]); + fault = cpu->translateInstReq(mem_req); //#else // fault = pTable->translate(memReq[tid]); //#endif @@ -508,48 +536,31 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid } #endif + // Build packet here. + PacketPtr data_pkt = new Packet(mem_req, + Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(cacheData[tid]); + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - fault = cpu->mem->read(memReq[tid], cacheData[tid]); - // This read may change when the mem interface changes. + + fetchedCacheLines++; // Now do the timing access to see whether or not the instruction // exists within the cache. - if (icacheInterface && !icacheInterface->isBlocked()) { - DPRINTF(Fetch, "Doing cache access.\n"); - - memReq[tid]->completionEvent = NULL; - - memReq[tid]->time = curTick; - - MemAccessResult result = icacheInterface->access(memReq[tid]); - - fetchedCacheLines++; - - // If the cache missed, then schedule an event to wake - // up this stage once the cache miss completes. - // @todo: Possibly allow for longer than 1 cycle cache hits. - if (result != MA_HIT && icacheInterface->doEvents()) { - - memReq[tid]->completionEvent = - new CacheCompletionEvent(memReq[tid], this); - - lastIcacheStall[tid] = curTick; - - DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache " - "miss.\n", tid); - - fetchStatus[tid] = IcacheMissStall; - } else { - DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction " - "read.\n", tid); - -// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); - } - } else { + if (!icachePort->sendTiming(data_pkt)) { DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); ret_fault = NoFault; return false; } + + DPRINTF(Fetch, "Doing cache access.\n"); + + lastIcacheStall[tid] = curTick; + + DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " + "response.\n", tid); + + fetchStatus[tid] = IcacheWaitResponse; } ret_fault = fault; @@ -567,10 +578,11 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) nextPC[tid] = new_PC + instSize; // Clear the icache miss if it's outstanding. - if (fetchStatus[tid] == IcacheMissStall && icacheInterface) { + if (fetchStatus[tid] == IcacheWaitResponse) { DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", tid); - memReq[tid] = NULL; + delete memPkt[tid]; + memPkt[tid] = NULL; } fetchStatus[tid] = Squashing; @@ -632,12 +644,12 @@ DefaultFetch::updateFetchStatus() if (fetchStatus[tid] == Running || fetchStatus[tid] == Squashing || - fetchStatus[tid] == IcacheMissComplete) { + fetchStatus[tid] == IcacheAccessComplete) { if (_status == Inactive) { DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); - if (fetchStatus[tid] == IcacheMissComplete) { + if (fetchStatus[tid] == IcacheAccessComplete) { DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" "completion\n",tid); } @@ -831,7 +843,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) } } - if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) { + if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) { DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); fetchStatus[tid] = Blocked; @@ -882,7 +894,7 @@ DefaultFetch::fetch(bool &status_change) // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. Possibly move this up // to tick() function. - if (fetchStatus[tid] == IcacheMissComplete) { + if (fetchStatus[tid] == IcacheAccessComplete) { DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); @@ -905,11 +917,11 @@ DefaultFetch::fetch(bool &status_change) ++fetchBlockedCycles; } else if (fetchStatus[tid] == Squashing) { ++fetchSquashCycles; - } else if (fetchStatus[tid] == IcacheMissStall) { + } else if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; } - // Status is Idle, Squashing, Blocked, or IcacheMissStall, so + // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so // fetch should do nothing. return; } @@ -917,7 +929,7 @@ DefaultFetch::fetch(bool &status_change) ++fetchCycles; // If we had a stall due to an icache miss, then return. - if (fetchStatus[tid] == IcacheMissStall) { + if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; status_change = true; return; @@ -1026,7 +1038,7 @@ DefaultFetch::fetch(bool &status_change) } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) - if (fetchStatus[tid] == IcacheMissStall) { + if (fetchStatus[tid] == IcacheWaitResponse) { panic("Fetch should have exited prior to this!"); } @@ -1107,7 +1119,7 @@ DefaultFetch::getFetchingThread(FetchPriority &fetch_priority) int tid = *((*activeThreads).begin()); if (fetchStatus[tid] == Running || - fetchStatus[tid] == IcacheMissComplete || + fetchStatus[tid] == IcacheAccessComplete || fetchStatus[tid] == Idle) { return tid; } else { @@ -1133,7 +1145,7 @@ DefaultFetch::roundRobin() assert(high_pri <= numThreads); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) { priorityList.erase(pri_iter); @@ -1167,7 +1179,7 @@ DefaultFetch::iqCount() unsigned high_pri = PQ.top(); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) return high_pri; else @@ -1198,7 +1210,7 @@ DefaultFetch::lsqCount() unsigned high_pri = PQ.top(); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) return high_pri; else diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index 29e84cd44..daf1007c1 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -33,6 +33,7 @@ #include #include "arch/isa_traits.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc new file mode 100644 index 000000000..fb2b5c00d --- /dev/null +++ b/src/cpu/o3/fu_pool.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "cpu/o3/fu_pool.hh" +#include "encumbered/cpu/full/fu_pool.hh" +#include "sim/builder.hh" + +using namespace std; + +//////////////////////////////////////////////////////////////////////////// +// +// A pool of function units +// + +inline void +FUPool::FUIdxQueue::addFU(int fu_idx) +{ + funcUnitsIdx.push_back(fu_idx); + ++size; +} + +inline int +FUPool::FUIdxQueue::getFU() +{ + int retval = funcUnitsIdx[idx++]; + + if (idx == size) + idx = 0; + + return retval; +} + +FUPool::~FUPool() +{ + fuListIterator i = funcUnits.begin(); + fuListIterator end = funcUnits.end(); + for (; i != end; ++i) + delete *i; +} + + +// Constructor +FUPool::FUPool(string name, vector paramList) + : SimObject(name) +{ + numFU = 0; + + funcUnits.clear(); + + for (int i = 0; i < Num_OpClasses; ++i) { + maxOpLatencies[i] = 0; + maxIssueLatencies[i] = 0; + } + + // + // Iterate through the list of FUDescData structures + // + for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) { + + // + // Don't bother with this if we're not going to create any FU's + // + if ((*i)->number) { + // + // Create the FuncUnit object from this structure + // - add the capabilities listed in the FU's operation + // description + // + // We create the first unit, then duplicate it as needed + // + FuncUnit *fu = new FuncUnit; + + OPDDiterator j = (*i)->opDescList.begin(); + OPDDiterator end = (*i)->opDescList.end(); + for (; j != end; ++j) { + // indicate that this pool has this capability + capabilityList.set((*j)->opClass); + + // Add each of the FU's that will have this capability to the + // appropriate queue. + for (int k = 0; k < (*i)->number; ++k) + fuPerCapList[(*j)->opClass].addFU(numFU + k); + + // indicate that this FU has the capability + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + + if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) + maxOpLatencies[(*j)->opClass] = (*j)->opLat; + + if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) + maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + } + + numFU++; + + // Add the appropriate number of copies of this FU to the list + ostringstream s; + + s << (*i)->name() << "(0)"; + fu->name = s.str(); + funcUnits.push_back(fu); + + for (int c = 1; c < (*i)->number; ++c) { + ostringstream s; + numFU++; + FuncUnit *fu2 = new FuncUnit(*fu); + + s << (*i)->name() << "(" << c << ")"; + fu2->name = s.str(); + funcUnits.push_back(fu2); + } + } + } + + unitBusy.resize(numFU); + + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } +} + +void +FUPool::annotateMemoryUnits(unsigned hit_latency) +{ + maxOpLatencies[MemReadOp] = hit_latency; + + fuListIterator i = funcUnits.begin(); + fuListIterator iend = funcUnits.end(); + for (; i != iend; ++i) { + if ((*i)->provides(MemReadOp)) + (*i)->opLatency(MemReadOp) = hit_latency; + + if ((*i)->provides(MemWriteOp)) + (*i)->opLatency(MemWriteOp) = hit_latency; + } +} + +int +FUPool::getUnit(OpClass capability) +{ + // If this pool doesn't have the specified capability, + // return this information to the caller + if (!capabilityList[capability]) + return -2; + + int fu_idx = fuPerCapList[capability].getFU(); + int start_idx = fu_idx; + + // Iterate through the circular queue if needed, stopping if we've reached + // the first element again. + while (unitBusy[fu_idx]) { + fu_idx = fuPerCapList[capability].getFU(); + if (fu_idx == start_idx) { + // No FU available + return -1; + } + } + + unitBusy[fu_idx] = true; + + return fu_idx; +} + +void +FUPool::freeUnitNextCycle(int fu_idx) +{ + assert(unitBusy[fu_idx]); + unitsToBeFreed.push_back(fu_idx); +} + +void +FUPool::processFreeUnits() +{ + while (!unitsToBeFreed.empty()) { + int fu_idx = unitsToBeFreed.back(); + unitsToBeFreed.pop_back(); + + assert(unitBusy[fu_idx]); + + unitBusy[fu_idx] = false; + } +} + +void +FUPool::dump() +{ + cout << "Function Unit Pool (" << name() << ")\n"; + cout << "======================================\n"; + cout << "Free List:\n"; + + for (int i = 0; i < numFU; ++i) { + if (unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } + + cout << "======================================\n"; + cout << "Busy List:\n"; + for (int i = 0; i < numFU; ++i) { + if (!unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } +} + +void +FUPool::switchOut() +{ +} + +void +FUPool::takeOverFrom() +{ + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } + unitsToBeFreed.clear(); +} + +// + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// FUPool - Contails a list of FUDesc objects to make available +// + +// +// The FuPool object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + SimObjectVectorParam FUList; + +END_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool) + + INIT_PARAM(FUList, "list of FU's for this pool") + +END_INIT_SIM_OBJECT_PARAMS(FUPool) + + +CREATE_SIM_OBJECT(FUPool) +{ + return new FUPool(getInstanceName(), FUList); +} + +REGISTER_SIM_OBJECT("FUPool", FUPool) + diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh new file mode 100644 index 000000000..f590c4149 --- /dev/null +++ b/src/cpu/o3/fu_pool.hh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_FU_POOL_HH__ +#define __CPU_O3_FU_POOL_HH__ + +#include +#include +#include +#include + +#include "base/sched_list.hh" +#include "cpu/op_class.hh" +#include "sim/sim_object.hh" + +class FUDesc; +class FuncUnit; + +/** + * Pool of FU's, specific to the new CPU model. The old FU pool had lists of + * free units and busy units, and whenever a FU was needed it would iterate + * through the free units to find a FU that provided the capability. This pool + * has lists of units specific to each of the capabilities, and whenever a FU + * is needed, it iterates through that list to find a free unit. The previous + * FU pool would have to be ticked each cycle to update which units became + * free. This FU pool lets the IEW stage handle freeing units, which frees + * them as their scheduled execution events complete. This limits units in this + * model to either have identical issue and op latencies, or 1 cycle issue + * latencies. + */ +class FUPool : public SimObject +{ + private: + /** Maximum op execution latencies, per op class. */ + unsigned maxOpLatencies[Num_OpClasses]; + /** Maximum issue latencies, per op class. */ + unsigned maxIssueLatencies[Num_OpClasses]; + + /** Bitvector listing capabilities of this FU pool. */ + std::bitset capabilityList; + + /** Bitvector listing which FUs are busy. */ + std::vector unitBusy; + + /** List of units to be freed at the end of this cycle. */ + std::vector unitsToBeFreed; + + /** + * Class that implements a circular queue to hold FU indices. The hope is + * that FUs that have been just used will be moved to the end of the queue + * by iterating through it, thus leaving free units at the head of the + * queue. + */ + class FUIdxQueue { + public: + /** Constructs a circular queue of FU indices. */ + FUIdxQueue() + : idx(0), size(0) + { } + + /** Adds a FU to the queue. */ + inline void addFU(int fu_idx); + + /** Returns the index of the FU at the head of the queue, and changes + * the index to the next element. + */ + inline int getFU(); + + private: + /** Circular queue index. */ + int idx; + + /** Size of the queue. */ + int size; + + /** Queue of FU indices. */ + std::vector funcUnitsIdx; + }; + + /** Per op class queues of FUs that provide that capability. */ + FUIdxQueue fuPerCapList[Num_OpClasses]; + + /** Number of FUs. */ + int numFU; + + /** Functional units. */ + std::vector funcUnits; + + typedef std::vector::iterator fuListIterator; + + public: + + /** Constructs a FU pool. */ + FUPool(std::string name, std::vector l); + ~FUPool(); + + /** Annotates units that provide memory operations. Included only because + * old FU pool provided this function. + */ + void annotateMemoryUnits(unsigned hit_latency); + + /** + * Gets a FU providing the requested capability. Will mark the unit as busy, + * but leaves the freeing of the unit up to the IEW stage. + * @param capability The capability requested. + * @return Returns -2 if the FU pool does not have the capability, -1 if + * there is no free FU, and the FU's index otherwise. + */ + int getUnit(OpClass capability); + + /** Frees a FU at the end of this cycle. */ + void freeUnitNextCycle(int fu_idx); + + /** Frees all FUs on the list. */ + void processFreeUnits(); + + /** Returns the total number of FUs. */ + int size() { return numFU; } + + /** Debugging function used to dump FU information. */ + void dump(); + + /** Returns the operation execution latency of the given capability. */ + unsigned getOpLatency(OpClass capability) { + return maxOpLatencies[capability]; + } + + /** Returns the issue latency of the given capability. */ + unsigned getIssueLatency(OpClass capability) { + return maxIssueLatencies[capability]; + } + + void switchOut(); + void takeOverFrom(); +}; + +#endif // __CPU_O3_FU_POOL_HH__ diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 935320628..c931669c6 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -110,25 +110,6 @@ class DefaultIEW /** Writeback status. */ StageStatus wbStatus; - public: - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - DefaultIEW *iewStage; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, DefaultIEW *_iew); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - }; - public: /** Constructs a DefaultIEW with the given parameters. */ DefaultIEW(Params *params); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b0137d7fc..955ebfdf3 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -38,58 +38,6 @@ using namespace std; -template -DefaultIEW::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - DefaultIEW *_iew) - : Event(&mainEventQueue), inst(_inst), iewStage(_iew) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -DefaultIEW::LdWritebackEvent::process() -{ - DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); - DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - - if (iewStage->isSwitchedOut()) { - inst = NULL; - return; - } else if (inst->isSquashed()) { - iewStage->wakeCPU(); - inst = NULL; - return; - } - - iewStage->wakeCPU(); - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Complete access to copy data to proper place. - if (inst->isStore()) { - inst->completeAcc(); - } - } - - // Need to insert instruction into queue to commit - iewStage->instToCommit(inst); - - iewStage->activityThisCycle(); - - inst = NULL; -} - -template -const char * -DefaultIEW::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - template DefaultIEW::DefaultIEW(Params *params) : // @todo: Make this into a parameter. @@ -1280,7 +1228,7 @@ DefaultIEW::executeInsts() ldstQueue.executeStore(inst); // If the store had a fault then it may not have a mem req - if (inst->req && !(inst->req->flags & LOCKED)) { + if (inst->req && !(inst->req->getFlags() & LOCKED)) { inst->setExecuted(); instToCommit(inst); @@ -1556,7 +1504,7 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) else iewExecutedInsts++; #else - iewExecutedInsts[thread_number]++; + iewExecutedInsts++; #endif // diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 518de73d9..843f6a8fe 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -38,7 +38,7 @@ #include "base/timebuf.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/dep_graph.hh" -#include "encumbered/cpu/full/op_class.hh" +#include "cpu/op_class.hh" #include "sim/host.hh" class FUPool; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index f1dc4e01f..4fa756cb6 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -64,8 +64,7 @@ InstructionQueue::FUCompletion::description() template InstructionQueue::InstructionQueue(Params *params) - : dcacheInterface(params->dcacheInterface), - fuPool(params->fuPool), + : fuPool(params->fuPool), numEntries(params->numIQEntries), totalWidth(params->issueWidth), numPhysIntRegs(params->numPhysIntRegs), diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc new file mode 100644 index 000000000..8991ab8f8 --- /dev/null +++ b/src/cpu/o3/lsq.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQ; + diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh new file mode 100644 index 000000000..51eb23cd7 --- /dev/null +++ b/src/cpu/o3/lsq.hh @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_HH__ +#define __CPU_O3_LSQ_HH__ + +#include +#include + +#include "config/full_system.hh" +#include "cpu/inst_seq.hh" +//#include "cpu/o3/cpu_policy.hh" +#include "cpu/o3/lsq_unit.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +template +class LSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQUnit LSQUnit; + + enum LSQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** Constructs an LSQ with the given parameters. */ + LSQ(Params *params); + + /** Returns the name of the LSQ. */ + std::string name() const; + + /** Sets the pointer to the list of active threads. */ + void setActiveThreads(std::list *at_ptr); + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr); + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + void takeOverFrom(); + + /** Number of entries needed for the given amount of threads.*/ + int entryAmount(int num_threads); + void removeEntries(unsigned tid); + /** Reset the max entries for each thread. */ + void resetEntries(); + /** Resize the max entries for a thread. */ + void resizeEntries(unsigned size, unsigned tid); + + /** Ticks the LSQ. */ + void tick(); + /** Ticks a specific LSQ Unit. */ + void tick(unsigned tid) + { thread[tid].tick(); } + + /** Inserts a load into the LSQ. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store into the LSQ. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx, unsigned tid) + { return thread[tid].executeLoad(lq_idx); } + + /** Executes a store. */ + Fault executeStore(DynInstPtr &inst); + + /** + * Commits loads up until the given sequence number for a specific thread. + */ + void commitLoads(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitLoads(youngest_inst); } + + /** + * Commits stores up until the given sequence number for a specific thread. + */ + void commitStores(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitStores(youngest_inst); } + + /** + * Attempts to write back stores until all cache ports are used or the + * interface becomes blocked. + */ + void writebackStores(); + /** Same as above, but only for one thread. */ + void writebackStores(unsigned tid); + + /** + * Squash instructions from a thread until the specified sequence number. + */ + void squash(const InstSeqNum &squashed_num, unsigned tid) + { thread[tid].squash(squashed_num); } + + /** Returns whether or not there was a memory ordering violation. */ + bool violation(); + /** + * Returns whether or not there was a memory ordering violation for a + * specific thread. + */ + bool violation(unsigned tid) + { return thread[tid].violation(); } + + /** Returns if a load is blocked due to the memory system for a specific + * thread. + */ + bool loadBlocked(unsigned tid) + { return thread[tid].loadBlocked(); } + + bool isLoadBlockedHandled(unsigned tid) + { return thread[tid].isLoadBlockedHandled(); } + + void setLoadBlockedHandled(unsigned tid) + { thread[tid].setLoadBlockedHandled(); } + + /** Gets the instruction that caused the memory ordering violation. */ + DynInstPtr getMemDepViolator(unsigned tid) + { return thread[tid].getMemDepViolator(); } + + /** Returns the head index of the load queue for a specific thread. */ + int getLoadHead(unsigned tid) + { return thread[tid].getLoadHead(); } + + /** Returns the sequence number of the head of the load queue. */ + InstSeqNum getLoadHeadSeqNum(unsigned tid) + { + return thread[tid].getLoadHeadSeqNum(); + } + + /** Returns the head index of the store queue. */ + int getStoreHead(unsigned tid) + { return thread[tid].getStoreHead(); } + + /** Returns the sequence number of the head of the store queue. */ + InstSeqNum getStoreHeadSeqNum(unsigned tid) + { + return thread[tid].getStoreHeadSeqNum(); + } + + /** Returns the number of instructions in all of the queues. */ + int getCount(); + /** Returns the number of instructions in the queues of one thread. */ + int getCount(unsigned tid) + { return thread[tid].getCount(); } + + /** Returns the total number of loads in the load queue. */ + int numLoads(); + /** Returns the total number of loads for a single thread. */ + int numLoads(unsigned tid) + { return thread[tid].numLoads(); } + + /** Returns the total number of stores in the store queue. */ + int numStores(); + /** Returns the total number of stores for a single thread. */ + int numStores(unsigned tid) + { return thread[tid].numStores(); } + + /** Returns the total number of loads that are ready. */ + int numLoadsReady(); + /** Returns the number of loads that are ready for a single thread. */ + int numLoadsReady(unsigned tid) + { return thread[tid].numLoadsReady(); } + + /** Returns the number of free entries. */ + unsigned numFreeEntries(); + /** Returns the number of free entries for a specific thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns if the LSQ is full (either LQ or SQ is full). */ + bool isFull(); + /** + * Returns if the LSQ is full for a specific thread (either LQ or SQ is + * full). + */ + bool isFull(unsigned tid); + + /** Returns if any of the LQs are full. */ + bool lqFull(); + /** Returns if the LQ of a given thread is full. */ + bool lqFull(unsigned tid); + + /** Returns if any of the SQs are full. */ + bool sqFull(); + /** Returns if the SQ of a given thread is full. */ + bool sqFull(unsigned tid); + + /** + * Returns if the LSQ is stalled due to a memory operation that must be + * replayed. + */ + bool isStalled(); + /** + * Returns if the LSQ of a specific thread is stalled due to a memory + * operation that must be replayed. + */ + bool isStalled(unsigned tid); + + /** Returns whether or not there are any stores to write back to memory. */ + bool hasStoresToWB(); + + /** Returns whether or not a specific thread has any stores to write back + * to memory. + */ + bool hasStoresToWB(unsigned tid) + { return thread[tid].hasStoresToWB(); } + + /** Returns the number of stores a specific thread has to write back. */ + int numStoresToWB(unsigned tid) + { return thread[tid].numStoresToWB(); } + + /** Returns if the LSQ will write back to memory this cycle. */ + bool willWB(); + /** Returns if the LSQ of a specific thread will write back to memory this + * cycle. + */ + bool willWB(unsigned tid) + { return thread[tid].willWB(); } + + /** Debugging function to print out all instructions. */ + void dumpInsts(); + /** Debugging function to print out instructions from a specific thread. */ + void dumpInsts(unsigned tid) + { thread[tid].dumpInsts(); } + + /** Executes a read operation, using the load specified at the load index. */ + template + Fault read(RequestPtr req, T &data, int load_idx); + + /** Executes a store operation, using the store specified at the store + * index. + */ + template + Fault write(RequestPtr req, T &data, int store_idx); + + private: + /** The LSQ policy for SMT mode. */ + LSQPolicy lsqPolicy; + + /** The LSQ units for individual threads. */ + LSQUnit thread[Impl::MaxThreads]; + + /** The CPU pointer. */ + FullCPU *cpu; + + /** The IEW stage pointer. */ + IEW *iewStage; + + /** The pointer to the page table. */ +// PageTable *pTable; + + /** List of Active Threads in System. */ + std::list *activeThreads; + + /** Total Size of LQ Entries. */ + unsigned LQEntries; + /** Total Size of SQ Entries. */ + unsigned SQEntries; + + /** Max LQ Size - Used to Enforce Sharing Policies. */ + unsigned maxLQEntries; + + /** Max SQ Size - Used to Enforce Sharing Policies. */ + unsigned maxSQEntries; + + /** Number of Threads. */ + unsigned numThreads; +}; + +template +template +Fault +LSQ::read(RequestPtr req, T &data, int load_idx) +{ + unsigned tid = req->getThreadNum(); + + return thread[tid].read(req, data, load_idx); +} + +template +template +Fault +LSQ::write(RequestPtr req, T &data, int store_idx) +{ + unsigned tid = req->getThreadNum(); + + return thread[tid].write(req, data, store_idx); +} + +#endif // __CPU_O3_LSQ_HH__ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh new file mode 100644 index 000000000..a6ad27522 --- /dev/null +++ b/src/cpu/o3/lsq_impl.hh @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "cpu/o3/lsq.hh" + +using namespace std; + +template +LSQ::LSQ(Params *params) + : LQEntries(params->LQEntries), SQEntries(params->SQEntries), + numThreads(params->numberOfThreads) +{ + DPRINTF(LSQ, "Creating LSQ object.\n"); + + //**********************************************/ + //************ Handle SMT Parameters ***********/ + //**********************************************/ + string policy = params->smtLSQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out fetch policy + if (policy == "dynamic") { + lsqPolicy = Dynamic; + + maxLQEntries = LQEntries; + maxSQEntries = SQEntries; + + DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); + + } else if (policy == "partitioned") { + lsqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + maxLQEntries = LQEntries / numThreads; + maxSQEntries = SQEntries / numThreads; + + DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else if (policy == "threshold") { + lsqPolicy = Threshold; + + assert(params->smtLSQThreshold > LQEntries); + assert(params->smtLSQThreshold > SQEntries); + + //Divide up by threshold amount + //@todo: Should threads check the max and the total + //amount of the LSQ + maxLQEntries = params->smtLSQThreshold; + maxSQEntries = params->smtLSQThreshold; + + DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else { + assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + } +} + + +template +std::string +LSQ::name() const +{ + return iewStage->name() + ".lsq"; +} + +template +void +LSQ::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; + assert(activeThreads != 0); +} + +template +void +LSQ::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setCPU(cpu_ptr); + } +} + +template +void +LSQ::setIEW(IEW *iew_ptr) +{ + iewStage = iew_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setIEW(iew_ptr); + } +} + +#if 0 +template +void +LSQ::setPageTable(PageTable *pt_ptr) +{ + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setPageTable(pt_ptr); + } +} +#endif + +template +void +LSQ::switchOut() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].switchOut(); + } +} + +template +void +LSQ::takeOverFrom() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].takeOverFrom(); + } +} + +template +int +LSQ::entryAmount(int num_threads) +{ + if (lsqPolicy == Partitioned) { + return LQEntries / num_threads; + } else { + return 0; + } +} + +template +void +LSQ::resetEntries() +{ + if (lsqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list::iterator threads = (*activeThreads).begin(); + list::iterator list_end = (*activeThreads).end(); + + int maxEntries; + + if (lsqPolicy == Partitioned) { + maxEntries = LQEntries / active_threads; + } else if (lsqPolicy == Threshold && active_threads == 1) { + maxEntries = LQEntries; + } else { + maxEntries = LQEntries; + } + + while (threads != list_end) { + resizeEntries(maxEntries,*threads++); + } + } +} + +template +void +LSQ::removeEntries(unsigned tid) +{ + thread[tid].clearLQ(); + thread[tid].clearSQ(); +} + +template +void +LSQ::resizeEntries(unsigned size,unsigned tid) +{ + thread[tid].resizeLQ(size); + thread[tid].resizeSQ(size); +} + +template +void +LSQ::tick() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + thread[tid].tick(); + } +} + +template +void +LSQ::insertLoad(DynInstPtr &load_inst) +{ + unsigned tid = load_inst->threadNumber; + + thread[tid].insertLoad(load_inst); +} + +template +void +LSQ::insertStore(DynInstPtr &store_inst) +{ + unsigned tid = store_inst->threadNumber; + + thread[tid].insertStore(store_inst); +} + +template +Fault +LSQ::executeLoad(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeLoad(inst); +} + +template +Fault +LSQ::executeStore(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeStore(inst); +} + +template +void +LSQ::writebackStores() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + if (numStoresToWB(tid) > 0) { + DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " + "available for Writeback.\n", tid, numStoresToWB(tid)); + } + + thread[tid].writebackStores(); + } +} + +template +bool +LSQ::violation() +{ + /* Answers: Does Anybody Have a Violation?*/ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (thread[tid].violation()) + return true; + } + + return false; +} + +template +int +LSQ::getCount() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += getCount(tid); + } + + return total; +} + +template +int +LSQ::numLoads() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += numLoads(tid); + } + + return total; +} + +template +int +LSQ::numStores() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numStores(); + } + + return total; +} + +template +int +LSQ::numLoadsReady() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numLoadsReady(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numFreeEntries(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries(unsigned tid) +{ + //if( lsqPolicy == Dynamic ) + //return numFreeEntries(); + //else + return thread[tid].numFreeEntries(); +} + +template +bool +LSQ::isFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (! (thread[tid].lqFull() || thread[tid].sqFull()) ) + return false; + } + + return true; +} + +template +bool +LSQ::isFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return isFull(); + else + return thread[tid].lqFull() || thread[tid].sqFull(); +} + +template +bool +LSQ::lqFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].lqFull()) + return false; + } + + return true; +} + +template +bool +LSQ::lqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return lqFull(); + else + return thread[tid].lqFull(); +} + +template +bool +LSQ::sqFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!sqFull(tid)) + return false; + } + + return true; +} + +template +bool +LSQ::sqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return sqFull(); + else + return thread[tid].sqFull(); +} + +template +bool +LSQ::isStalled() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].isStalled()) + return false; + } + + return true; +} + +template +bool +LSQ::isStalled(unsigned tid) +{ + if( lsqPolicy == Dynamic ) + return isStalled(); + else + return thread[tid].isStalled(); +} + +template +bool +LSQ::hasStoresToWB() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!hasStoresToWB(tid)) + return false; + } + + return true; +} + +template +bool +LSQ::willWB() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!willWB(tid)) + return false; + } + + return true; +} + +template +void +LSQ::dumpInsts() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + thread[tid].dumpInsts(); + } +} diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc new file mode 100644 index 000000000..dd29007bc --- /dev/null +++ b/src/cpu/o3/lsq_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQUnit; + diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh new file mode 100644 index 000000000..b339cea2c --- /dev/null +++ b/src/cpu/o3/lsq_unit.hh @@ -0,0 +1,629 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_UNIT_HH__ +#define __CPU_O3_LSQ_UNIT_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +//#include "sim/debug.hh" +//#include "sim/sim_object.hh" + +/** + * Class that implements the actual LQ and SQ for each specific + * thread. Both are circular queues; load entries are freed upon + * committing, while store entries are freed once they writeback. The + * LSQUnit tracks if there are memory ordering violations, and also + * detects partial load to store forwarding cases (a store only has + * part of a load's data) that requires the load to wait until the + * store writes back. In the former case it holds onto the instruction + * until the dependence unit looks at it, and in the latter it stalls + * the LSQ until the store writes back. At that point the load is + * replayed. + */ +template +class LSQUnit { + protected: + typedef TheISA::IntReg IntReg; + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + LSQUnit(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr) + { iewStage = iew_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + + void takeOverFrom(); + + bool isSwitchedOut() { return switchedOut; } + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + void completeDataAccess(PacketPtr pkt); + + void completeStoreDataAccess(DynInstPtr &inst); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed/* && + !dcacheInterface->isBlocked()*/; } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + public: + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the IEW stage. */ + IEW *iewStage; + + MemObject *mem; + + class DcachePort : public Port + { + protected: + FullCPU *cpu; + LSQUnit *lsq; + + public: + DcachePort(FullCPU *_cpu, LSQUnit *_lsq) + : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + + /** Pointer to the D-cache. */ + DcachePort *dcachePort; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The request for the store. */ + RequestPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + private: + /** The LSQUnit thread id. */ + unsigned lsqID; + + /** The store queue. */ + std::vector storeQueue; + + /** The load queue. */ + std::vector loadQueue; + + /** The number of LQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of LQ entries. + */ + unsigned LQEntries; + /** The number of SQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of SQ entries. + */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ. */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that may be ready to be + * written back, and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + bool switchedOut; + + //list mshrSeqNums; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. +/* + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + + // total non-speculative bogus addresses seen (debug var) + Counter sim_invalid_addrs; + Stats::Vector<> fu_busy; //cumulative fu busy + + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; +*/ + public: + /** Executes the load at the given index. */ + template + Fault read(Request *req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(Request *req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +LSQUnit::read(Request *req, T &data, int load_idx) +{ + DynInstPtr load_inst = loadQueue[load_idx]; + + assert(load_inst); + + assert(!load_inst->isExecuted()); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + if (req->getFlags() & UNCACHEABLE && + (load_idx != loadHead || !load_inst->reachedCommit)) { + iewStage->rescheduleMemInst(load_inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = load_inst->sqIdx; + + int store_size = 0; + + DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->getPaddr()); + +#if 0 + if (req->getFlags() & LOCKED) { + cpu->lockAddr = req->getPaddr(); + cpu->lockFlag = true; + } +#endif + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->getVaddr() >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->getVaddr() + req->getSize()) <= + (storeQueue[store_idx].inst->effAddr + store_size); + bool lower_load_has_store_part = + req->getVaddr() < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->getVaddr() + req->getSize()) > + storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + // Get shift amount for offset into the store's data. + int shift_amt = req->getVaddr() & (store_size - 1); + // @todo: Magic number, assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + memcpy(load_inst->memData, &data, req->getSize()); + + DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->getVaddr(), *(load_inst->memData)); +/* + typename LdWritebackEvent *wb = + new typename LdWritebackEvent(load_inst, + iewStage); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + wb->schedule(curTick); +*/ + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + load_inst->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + iewStage->rescheduleMemInst(load_inst); + + // Do not generate a writeback event as this instruction is not + // complete. + DPRINTF(LSQUnit, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->getVaddr()); + + return NoFault; + } + } + + // If there's no forwarding case, then go access memory + DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + load_inst->seqNum, load_inst->readPC()); + + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + ++usedPorts; + + DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", + load_inst->readPC()); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + // if we have a cache, do cache access too + if (!dcachePort->sendTiming(data_pkt)) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = load_inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + if (data_pkt->result != Packet::Success) { + DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + load_inst->seqNum); + } else { + DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); + DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", + load_inst->seqNum); + } + + return NoFault; +} + +template +template +Fault +LSQUnit::write(Request *req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->getPaddr(), data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_O3_LSQ_UNIT_HH__ diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh new file mode 100644 index 000000000..3f6af3d2c --- /dev/null +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -0,0 +1,866 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/checker/cpu.hh" +#include "cpu/o3/lsq_unit.hh" +#include "base/str.hh" +#include "mem/request.hh" + +template +void +LSQUnit::completeDataAccess(PacketPtr pkt) +{ +/* + DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); + DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (iewStage->isSwitchedOut()) { + inst = NULL; + return; + } else if (inst->isSquashed()) { + iewStage->wakeCPU(); + inst = NULL; + return; + } + + iewStage->wakeCPU(); + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + + iewStage->activityThisCycle(); + + inst = NULL; +*/ +} + +template +void +LSQUnit::completeStoreDataAccess(DynInstPtr &inst) +{ +/* + DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx); + DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + + if (lsqPtr->isSwitchedOut()) + return; + + lsqPtr->cpu->wakeCPU(); + + if (wb) + lsqPtr->completeDataAccess(storeIdx); + lsqPtr->completeStore(storeIdx); +*/ +} + +template +Tick +LSQUnit::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template +void +LSQUnit::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template +void +LSQUnit::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template +bool +LSQUnit::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->completeDataAccess(pkt); + return true; +} + +template +void +LSQUnit::DcachePort::recvRetry() +{ + panic("Retry unsupported for now!"); + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit +/* + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + PacketPtr tmp = cpu->dcache_pkt; + if (sendTiming(tmp)) { + cpu->_status = DcacheWaitResponse; + cpu->dcache_pkt = NULL; + } +*/ +} + +template +LSQUnit::LSQUnit() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template +void +LSQUnit::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) +{ + DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); + + switchedOut = false; + + lsqID = id; + + // Add 1 for the sentinel entry (they are circular queues). + LQEntries = maxLQEntries + 1; + SQEntries = maxSQEntries + 1; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + Port *mem_dport = params->mem->getPort(""); + dcachePort->setPeer(mem_dport); + mem_dport->setPeer(dcachePort); + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template +void +LSQUnit::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + dcachePort = new DcachePort(cpu, this); +} + +template +std::string +LSQUnit::name() const +{ + if (Impl::MaxThreads == 1) { + return iewStage->name() + ".lsq"; + } else { + return iewStage->name() + ".lsq.thread." + to_string(lsqID); + } +} + +template +void +LSQUnit::clearLQ() +{ + loadQueue.clear(); +} + +template +void +LSQUnit::clearSQ() +{ + storeQueue.clear(); +} + +#if 0 +template +void +LSQUnit::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(LSQUnit, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} +#endif + +template +void +LSQUnit::switchOut() +{ + switchedOut = true; + for (int i = 0; i < loadQueue.size(); ++i) + loadQueue[i] = NULL; + + assert(storesToWB == 0); +} + +template +void +LSQUnit::takeOverFrom() +{ + switchedOut = false; + loads = stores = storesToWB = 0; + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; + + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; +} + +template +void +LSQUnit::resizeLQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + assert(size_plus_sentinel >= LQEntries); + + if (size_plus_sentinel > LQEntries) { + while (size_plus_sentinel > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size_plus_sentinel; + } + +} + +template +void +LSQUnit::resizeSQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + if (size_plus_sentinel > SQEntries) { + while (size_plus_sentinel > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size_plus_sentinel; + } +} + +template +void +LSQUnit::insert(DynInstPtr &inst) +{ + assert(inst->isMemRef()); + + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + + inst->setInLSQ(); +} + +template +void +LSQUnit::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead); + assert(loads < LQEntries); + + DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template +void +LSQUnit::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; +} + +template +typename Impl::DynInstPtr +LSQUnit::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +LSQUnit::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +LSQUnit::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template +Fault +LSQUnit::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + load_fault = inst->initiateAcc(); + + // If the instruction faulted, then we need to send it along to commit + // without the instruction completing. + if (load_fault != NoFault) { + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. + iewStage->instToCommit(inst); + iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +LSQUnit::executeStore(DynInstPtr &store_inst) +{ + using namespace TheISA; + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); +// Fault store_fault = store_inst->execute(); + + if (storeQueue[store_idx].size == 0) { + DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +LSQUnit::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template +void +LSQUnit::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +LSQUnit::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + // Mark any stores that are now committed and have not yet + // been marked as able to write back. + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(LSQUnit, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template +void +LSQUnit::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + // Store didn't write any data so no need to write it back to + // memory. + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } +/* + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(LSQUnit, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } +*/ + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + DynInstPtr inst = storeQueue[storeWBIdx].inst; + + Request *req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize()); + + PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + req->getPaddr(), *(inst->memData), + storeQueue[storeWBIdx].inst->seqNum); + + if (!dcachePort->sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + } else { + /* + StoreCompletionEvent *store_event = new + StoreCompletionEvent(storeWBIdx, NULL, this); + */ + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } +/* + typename LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C should not generate a system port transaction + // if it misses in the cache, but that might be hard + // to accomplish without explicit cache support. + wb = new typename + LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + store_event->wbEvent = wb; + } +*/ + if (data_pkt->result != Packet::Success) { + DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + } + + incrStIdx(storeWBIdx); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template +void +LSQUnit::removeMSHR(InstSeqNum seqNum) +{ + list::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template +void +LSQUnit::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + + // Clear the smart pointer to make sure it is decremented. + loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && + storeQueue[store_idx].inst->seqNum > squashed_num) { + // Instructions marked as can WB are already committed. + if (storeQueue[store_idx].canWB) { + break; + } + + DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared + // by the stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + // Clear the smart pointer to make sure it is decremented. + storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template +void +LSQUnit::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. + cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + + iewStage->updateLSQNextCycle = true; + } + + DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " + "idx:%i\n", + storeQueue[store_idx].inst->seqNum, store_idx, storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + storeQueue[store_idx].inst->setCompleted(); + + // Tell the checker we've completed this instruction. Some stores + // may get reported twice to the checker, but the checker can + // handle that case. + if (cpu->checker) { + cpu->checker->tick(storeQueue[store_idx].inst); + } +} + +template +inline void +LSQUnit::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template +inline void +LSQUnit::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template +inline void +LSQUnit::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template +inline void +LSQUnit::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} + +template +void +LSQUnit::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("%#x ", loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("Store queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("%#x ", storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 3350903db..45fe490d2 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -31,6 +31,7 @@ #include "arch/isa_traits.hh" #include "arch/faults.hh" +#include "arch/types.hh" #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/o3/comm.hh" @@ -44,9 +45,8 @@ /** * Simple physical register file class. - * This really only depends on the ISA, and not the Impl. Things that are - * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably - * should go in the AlphaFullCPU. + * Right now this is specific to Alpha until we decide if/how to make things + * generic enough to support other ISAs. */ template class PhysRegFile @@ -54,8 +54,15 @@ class PhysRegFile protected: typedef TheISA::IntReg IntReg; typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscRegFile MiscRegFile; typedef TheISA::MiscReg MiscReg; + + typedef union { + FloatReg d; + FloatRegBits q; + } PhysFloatReg; + // Note that most of the definitions of the IntReg, FloatReg, etc. exist // within the Impl/ISA class and not within this PhysRegFile class. @@ -97,7 +104,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatReg floatReg = floatRegFile.readReg(reg_idx, width); + FloatReg floatReg = floatRegFile[reg_idx].d; DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has " "data %8.8d\n", int(reg_idx), (double)floatReg); @@ -113,7 +120,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatReg floatReg = floatRegFile.readReg(reg_idx); + FloatReg floatReg = floatRegFile[reg_idx].d; DPRINTF(IEW, "RegFile: Access to float register %i, has " "data %8.8d\n", int(reg_idx), (double)floatReg); @@ -129,7 +136,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width); + FloatRegBits floatRegBits = floatRegFile[reg_idx].q; DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, " "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); @@ -144,7 +151,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx); + FloatRegBits floatRegBits = floatRegFile[reg_idx].q; DPRINTF(IEW, "RegFile: Access to float register %i as int, " "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); @@ -176,7 +183,7 @@ class PhysRegFile int(reg_idx), (double)val); if (reg_idx != TheISA::ZeroReg) - floatRegFile.setReg(reg_idx, val, width); + floatRegFile[reg_idx].d = width; } /** Sets a double precision floating point register to the given value. */ @@ -191,7 +198,7 @@ class PhysRegFile int(reg_idx), (double)val); if (reg_idx != TheISA::ZeroReg) - floatRegFile.setReg(reg_idx, val); + floatRegFile[reg_idx].d = val; } /** Sets a floating point register to the given integer value. */ @@ -205,7 +212,7 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), (uint64_t)val); - floatRegFile.setRegBits(reg_idx, val, width); + floatRegFile[reg_idx].q = val; } void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) @@ -217,6 +224,13 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), (uint64_t)val); + + floatRegFile[reg_idx].q = val; + } + + MiscReg readMiscReg(int misc_reg, unsigned thread_id) + { + return miscRegs[thread_id].readReg(misc_reg); } MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, @@ -249,7 +263,7 @@ class PhysRegFile std::vector intRegFile; /** Floating point register file. */ - std::vector floatRegFile; + std::vector floatRegFile; /** Miscellaneous register file. */ MiscRegFile miscRegs[Impl::MaxThreads]; diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh index d01fd93ce..640445407 100644 --- a/src/cpu/o3/sat_counter.hh +++ b/src/cpu/o3/sat_counter.hh @@ -29,6 +29,7 @@ #ifndef __CPU_O3_SAT_COUNTER_HH__ #define __CPU_O3_SAT_COUNTER_HH__ +#include "base/misc.hh" #include "sim/host.hh" /** diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc new file mode 100644 index 000000000..b0e433620 --- /dev/null +++ b/src/cpu/o3/scoreboard.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/scoreboard.hh" + +Scoreboard::Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + zeroRegIdx(_zeroRegIdx) +{ + //Get Register Sizes + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Resize scoreboard appropriately + regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads)); + + //Initialize values + for (int i=0; i < numLogicalIntRegs * activeThreads; i++) { + regScoreBoard[i] = 1; + } + + for (int i= numPhysicalIntRegs; + i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } + + for (int i = numPhysicalRegs; + i < numPhysicalRegs + (numMiscRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } +} + +std::string +Scoreboard::name() const +{ + return "cpu.scoreboard"; +} + +bool +Scoreboard::getReg(PhysRegIndex phys_reg) +{ + // Always ready if int or fp zero reg. + if (phys_reg == zeroRegIdx || + phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { + return 1; + } + + return regScoreBoard[phys_reg]; +} + +void +Scoreboard::setReg(PhysRegIndex phys_reg) +{ + DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg); + + regScoreBoard[phys_reg] = 1; +} + +void +Scoreboard::unsetReg(PhysRegIndex ready_reg) +{ + if (ready_reg == zeroRegIdx || + ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { + // Don't do anything if int or fp zero reg. + return; + } + + regScoreBoard[ready_reg] = 0; +} diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh new file mode 100644 index 000000000..77f2cf157 --- /dev/null +++ b/src/cpu/o3/scoreboard.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_SCOREBOARD_HH__ +#define __CPU_O3_SCOREBOARD_HH__ + +#include +#include +#include +#include "arch/alpha/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * Implements a simple scoreboard to track which registers are ready. + * This class assumes that the fp registers start, index wise, right after + * the integer registers. The misc. registers start, index wise, right after + * the fp registers. + * @todo: Fix up handling of the zero register in case the decoder does not + * automatically make insts that write the zero register into nops. + */ +class Scoreboard +{ + public: + /** Constructs a scoreboard. + * @param activeThreads The number of active threads. + * @param _numLogicalIntRegs Number of logical integer registers. + * @param _numPhysicalIntRegs Number of physical integer registers. + * @param _numLogicalFloatRegs Number of logical fp registers. + * @param _numPhysicalFloatRegs Number of physical fp registers. + * @param _numMiscRegs Number of miscellaneous registers. + * @param _zeroRegIdx Index of the zero register. + */ + Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx); + + /** Destructor. */ + ~Scoreboard() {} + + /** Returns the name of the scoreboard. */ + std::string name() const; + + /** Checks if the register is ready. */ + bool getReg(PhysRegIndex ready_reg); + + /** Sets the register as ready. */ + void setReg(PhysRegIndex phys_reg); + + /** Sets the register as not ready. */ + void unsetReg(PhysRegIndex ready_reg); + + private: + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector regScoreBoard; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The logical index of the zero register. */ + int zeroRegIdx; +}; + +#endif diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh new file mode 100644 index 000000000..9101eafb9 --- /dev/null +++ b/src/cpu/o3/thread_state.hh @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_THREAD_STATE_HH__ +#define __CPU_O3_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" + +class Event; +class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class FunctionalMemory; +class Process; +#endif + +/** + * Class that has various thread state, such as the status, the + * current instruction being processed, whether or not the thread has + * a trap pending or is being externally updated, the ExecContext + * proxy pointer, etc. It also handles anything related to a specific + * thread's process, such as syscalls and checking valid addresses. + */ +template +struct O3ThreadState : public ThreadState { + typedef ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + + Status _status; + + // Current instruction + TheISA::MachInst inst; + private: + FullCPU *cpu; + public: + + bool inSyscall; + + bool trapPending; + +#if FULL_SYSTEM + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { } +#else + O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, NULL, _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } + + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } +#endif + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } + +#if !FULL_SYSTEM + void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); } +#endif +}; + +#endif // __CPU_O3_THREAD_STATE_HH__ -- cgit v1.2.3