summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-06-02 18:15:20 -0400
committerKevin Lim <ktlim@umich.edu>2006-06-02 18:15:20 -0400
commit7940c10ace28d5b93a61d4d278e6647e0c497149 (patch)
tree902ac028a61bbcc93485d8efc6e7ba5fe8c47d00 /src/cpu/o3
parentd4b73086b6b0856c28433b55c8dd5c7b56a1b6df (diff)
downloadgem5-7940c10ace28d5b93a61d4d278e6647e0c497149.tar.xz
Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate.
Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding. src/SConscript: Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future. src/base/traceflags.py: Include new traces flags, fix up merge mess up. src/cpu/SConscript: Include the base_dyn_inst.cc as one of othe sources. Don't compile the Ozone CPU for now. src/cpu/base.cc: Remove an extra } from the merge. src/cpu/base_dyn_inst.cc: Fixes to make compiling work. Don't instantiate the OzoneCPU for now. src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/btb.hh: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/sat_counter.hh: src/cpu/op_class.hh: src/cpu/ozone/cpu.hh: src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: src/cpu/checker/exec_context.hh: src/cpu/checker/o3_cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/mem/request.hh: src/cpu/o3/fu_pool.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/back_end.hh: src/cpu/ozone/dyn_inst.cc: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/inorder_back_end.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/thread_state.hh: Fixes to get compiling to work. src/cpu/o3/alpha_cpu.hh: Fixes to get compiling to work. Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs. src/cpu/o3/alpha_dyn_inst_impl.hh: Fixes to get compiling to work. Pass in the packet to the completeAcc function. Fix up syscall function. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/2bit_local_pred.cc1
-rw-r--r--src/cpu/o3/alpha_cpu.hh80
-rw-r--r--src/cpu/o3/alpha_cpu_builder.cc13
-rw-r--r--src/cpu/o3/alpha_cpu_impl.hh86
-rw-r--r--src/cpu/o3/alpha_dyn_inst.hh9
-rw-r--r--src/cpu/o3/alpha_dyn_inst_impl.hh12
-rw-r--r--src/cpu/o3/alpha_params.hh9
-rw-r--r--src/cpu/o3/bpred_unit.cc4
-rw-r--r--src/cpu/o3/btb.hh1
-rw-r--r--src/cpu/o3/commit.hh4
-rw-r--r--src/cpu/o3/commit_impl.hh3
-rw-r--r--src/cpu/o3/cpu.cc21
-rw-r--r--src/cpu/o3/cpu.hh22
-rw-r--r--src/cpu/o3/dep_graph.hh213
-rw-r--r--src/cpu/o3/fetch.hh67
-rw-r--r--src/cpu/o3/fetch_impl.hh180
-rw-r--r--src/cpu/o3/free_list.hh1
-rw-r--r--src/cpu/o3/fu_pool.cc295
-rw-r--r--src/cpu/o3/fu_pool.hh162
-rw-r--r--src/cpu/o3/iew.hh19
-rw-r--r--src/cpu/o3/iew_impl.hh56
-rw-r--r--src/cpu/o3/inst_queue.hh2
-rw-r--r--src/cpu/o3/inst_queue_impl.hh3
-rw-r--r--src/cpu/o3/lsq.cc36
-rw-r--r--src/cpu/o3/lsq.hh324
-rw-r--r--src/cpu/o3/lsq_impl.hh538
-rw-r--r--src/cpu/o3/lsq_unit.cc36
-rw-r--r--src/cpu/o3/lsq_unit.hh629
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh866
-rw-r--r--src/cpu/o3/regfile.hh36
-rw-r--r--src/cpu/o3/sat_counter.hh1
-rw-r--r--src/cpu/o3/scoreboard.cc106
-rw-r--r--src/cpu/o3/scoreboard.hh114
-rw-r--r--src/cpu/o3/thread_state.hh112
34 files changed, 3743 insertions, 318 deletions
diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc
index c3fb2fdb8..2f768fd34 100644
--- a/src/cpu/o3/2bit_local_pred.cc
+++ b/src/cpu/o3/2bit_local_pred.cc
@@ -27,6 +27,7 @@
*/
#include "base/intmath.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "cpu/o3/2bit_local_pred.hh"
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
index 1bab0703e..fe88a1acc 100644
--- a/src/cpu/o3/alpha_cpu.hh
+++ b/src/cpu/o3/alpha_cpu.hh
@@ -39,11 +39,15 @@ namespace Kernel {
class Statistics;
};
+class TranslatingPort;
+
template <class Impl>
class AlphaFullCPU : public FullO3CPU<Impl>
{
protected:
typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscReg MiscReg;
typedef TheISA::RegFile RegFile;
typedef TheISA::MiscRegFile MiscRegFile;
@@ -69,7 +73,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual int readCpuId() { return cpu->cpu_id; }
- virtual FunctionalMemory *getMemPtr() { return thread->mem; }
+ virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
#if FULL_SYSTEM
virtual System *getSystemPtr() { return cpu->system; }
@@ -135,19 +139,23 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual uint64_t readIntReg(int reg_idx);
- virtual float readFloatRegSingle(int reg_idx);
+ virtual FloatReg readFloatReg(int reg_idx, int width);
+
+ virtual FloatReg readFloatReg(int reg_idx);
- virtual double readFloatRegDouble(int reg_idx);
+ virtual FloatRegBits readFloatRegBits(int reg_idx, int width);
- virtual uint64_t readFloatRegInt(int reg_idx);
+ virtual FloatRegBits readFloatRegBits(int reg_idx);
virtual void setIntReg(int reg_idx, uint64_t val);
- virtual void setFloatRegSingle(int reg_idx, float val);
+ virtual void setFloatReg(int reg_idx, FloatReg val, int width);
+
+ virtual void setFloatReg(int reg_idx, FloatReg val);
- virtual void setFloatRegDouble(int reg_idx, double val);
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
- virtual void setFloatRegInt(int reg_idx, uint64_t val);
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
virtual uint64_t readPC()
{ return cpu->readPC(thread->tid); }
@@ -159,6 +167,15 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual void setNextPC(uint64_t val);
+ virtual uint64_t readNextNPC()
+ {
+ panic("Alpha has no NextNPC!");
+ return 0;
+ }
+
+ virtual void setNextNPC(uint64_t val)
+ { panic("Alpha has no NextNPC!"); }
+
virtual MiscReg readMiscReg(int misc_reg)
{ return cpu->readMiscReg(misc_reg, thread->tid); }
@@ -193,10 +210,14 @@ class AlphaFullCPU : public FullO3CPU<Impl>
virtual void setSyscallReturn(SyscallReturn return_value);
- virtual void syscall() { return cpu->syscall(thread->tid); }
+ virtual void syscall(int64_t callnum)
+ { return cpu->syscall(callnum, thread->tid); }
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
#endif
+ virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
+ TheISA::RegFile::ContextVal val)
+ { panic("Not supported on Alpha!"); }
};
#if FULL_SYSTEM
@@ -211,52 +232,43 @@ class AlphaFullCPU : public FullO3CPU<Impl>
#if FULL_SYSTEM
/** Translates instruction requestion. */
- Fault translateInstReq(MemReqPtr &req)
+ Fault translateInstReq(RequestPtr &req)
{
return itb->translate(req);
}
/** Translates data read request. */
- Fault translateDataReadReq(MemReqPtr &req)
+ Fault translateDataReadReq(RequestPtr &req)
{
return dtb->translate(req, false);
}
/** Translates data write request. */
- Fault translateDataWriteReq(MemReqPtr &req)
+ Fault translateDataWriteReq(RequestPtr &req)
{
return dtb->translate(req, true);
}
#else
- Fault dummyTranslation(MemReqPtr &req)
- {
-#if 0
- assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
- // put the asid in the upper 16 bits of the paddr
- req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
- req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
- return NoFault;
- }
-
/** Translates instruction requestion in syscall emulation mode. */
- Fault translateInstReq(MemReqPtr &req)
+ Fault translateInstReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
/** Translates data read request in syscall emulation mode. */
- Fault translateDataReadReq(MemReqPtr &req)
+ Fault translateDataReadReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
/** Translates data write request in syscall emulation mode. */
- Fault translateDataWriteReq(MemReqPtr &req)
+ Fault translateDataWriteReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
#endif
@@ -298,7 +310,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Executes a syscall.
* @todo: Determine if this needs to be virtual.
*/
- void syscall(int thread_num);
+ void syscall(int64_t callnum, int thread_num);
/** Gets a syscall argument. */
IntReg getSyscallArg(int i, int tid);
@@ -311,7 +323,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Read from memory function. */
template <class T>
- Fault read(MemReqPtr &req, T &data)
+ Fault read(RequestPtr &req, T &data)
{
#if 0
#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
@@ -338,14 +350,14 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** CPU read function, forwards read to LSQ. */
template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx)
+ Fault read(RequestPtr &req, T &data, int load_idx)
{
return this->iew.ldstQueue.read(req, data, load_idx);
}
/** Write to memory function. */
template <class T>
- Fault write(MemReqPtr &req, T &data)
+ Fault write(RequestPtr &req, T &data)
{
#if 0
#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
@@ -417,7 +429,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** CPU write function, forwards write to LSQ. */
template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx)
+ Fault write(RequestPtr &req, T &data, int store_idx)
{
return this->iew.ldstQueue.write(req, data, store_idx);
}
diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc
index b0d812edc..6ac408364 100644
--- a/src/cpu/o3/alpha_cpu_builder.cc
+++ b/src/cpu/o3/alpha_cpu_builder.cc
@@ -33,7 +33,6 @@
#include "cpu/o3/alpha_impl.hh"
#include "cpu/o3/alpha_params.hh"
#include "cpu/o3/fu_pool.hh"
-#include "mem/cache/base_cache.hh"
#include "sim/builder.hh"
class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
@@ -60,7 +59,7 @@ SimObjectVectorParam<Process *> workload;
//SimObjectParam<PageTable *> page_table;
#endif // FULL_SYSTEM
-SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<MemObject *> mem;
SimObjectParam<BaseCPU *> checker;
@@ -69,9 +68,6 @@ Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
Param<unsigned> cachePorts;
Param<unsigned> decodeToFetchDelay;
@@ -169,7 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
// INIT_PARAM(page_table, "Page table"),
#endif // FULL_SYSTEM
- INIT_PARAM_DFLT(mem, "Memory", NULL),
+ INIT_PARAM(mem, "Memory"),
INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
@@ -188,9 +184,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
"count",
0),
- INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
- INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
@@ -327,8 +320,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
//
// Caches
//
- params->icacheInterface = icache ? icache->getInterface() : NULL;
- params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
params->cachePorts = cachePorts;
params->decodeToFetchDelay = decodeToFetchDelay;
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
index f7f0a3842..a890cfd90 100644
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ b/src/cpu/o3/alpha_cpu_impl.hh
@@ -31,7 +31,6 @@
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/checker/exec_context.hh"
-#include "mem/mem_interface.hh"
#include "sim/sim_events.hh"
#include "sim/stats.hh"
@@ -68,11 +67,9 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
this->thread[i]->setStatus(ExecContext::Suspended);
#else
if (i < params->workload.size()) {
- DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
- "process is %#x",
- i, params->workload[i]->prog_entry, this->thread[i]);
+ DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
+ i, this->thread[i]);
this->thread[i] = new Thread(this, i, params->workload[i], i);
- assert(params->workload[i]->getMemory() != NULL);
this->thread[i]->setStatus(ExecContext::Suspended);
//usedTids[i] = true;
@@ -160,7 +157,7 @@ void
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
{
// some things should already be set up
- assert(getMemPtr() == old_context->getMemPtr());
+ assert(getMemPort() == old_context->getMemPort());
#if FULL_SYSTEM
assert(getSystemPtr() == old_context->getSystemPtr());
#else
@@ -366,15 +363,14 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
}
// Then loop through the floating point registers.
- for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
- {
- renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- this->cpuXC->setFloatRegBits(i,
- this->regFile.readFloatRegBits(renamed_reg));
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+ renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag);
+ cpu->setFloatRegBits(renamed_reg,
+ xc->readFloatRegBits(i));
}
// Copy the misc regs.
- cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+ copyMiscRegs(xc, this);
// Then finally set the PC and the next PC.
cpu->setPC(xc->readPC(), tid);
@@ -398,24 +394,40 @@ AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
}
template <class Impl>
-float
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
- return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+ switch(width) {
+ case 32:
+ return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+ case 64:
+ return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+ default:
+ panic("Unsupported width!");
+ return 0;
+ }
}
template <class Impl>
-double
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
- return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+ return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
}
template <class Impl>
-uint64_t
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx, int width)
+{
+ DPRINTF(Fault, "Reading floatint register through the XC!\n");
+ return cpu->readArchFloatRegInt(reg_idx, thread->tid);
+}
+
+template <class Impl>
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
{
DPRINTF(Fault, "Reading floatint register through the XC!\n");
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
@@ -435,10 +447,17 @@ AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
- cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+ switch(width) {
+ case 32:
+ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+ break;
+ case 64:
+ cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+ break;
+ }
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
@@ -447,10 +466,23 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
- cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+
+ if (!thread->trapPending && !thread->inSyscall) {
+ cpu->squashFromXC(thread->tid);
+ }
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val,
+ int width)
+{
+ DPRINTF(Fault, "Setting floatint register through the XC!\n");
+ cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
@@ -459,7 +491,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
{
DPRINTF(Fault, "Setting floatint register through the XC!\n");
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
@@ -723,7 +755,7 @@ AlphaFullCPU<Impl>::processInterrupts()
template <class Impl>
void
-AlphaFullCPU<Impl>::syscall(int tid)
+AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid)
{
DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid);
@@ -734,7 +766,7 @@ AlphaFullCPU<Impl>::syscall(int tid)
++(this->thread[tid]->funcExeInst);
// Execute the actual syscall.
- this->thread[tid]->syscall();
+ this->thread[tid]->syscall(callnum);
// Decrease funcExeInst by one as the normal commit will handle
// incrementing it.
diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh
index b03c8c337..f289bbf0d 100644
--- a/src/cpu/o3/alpha_dyn_inst.hh
+++ b/src/cpu/o3/alpha_dyn_inst.hh
@@ -29,11 +29,14 @@
#ifndef __CPU_O3_ALPHA_DYN_INST_HH__
#define __CPU_O3_ALPHA_DYN_INST_HH__
+#include "arch/isa_traits.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/alpha_cpu.hh"
#include "cpu/o3/alpha_impl.hh"
+class Packet;
+
/**
* Mostly implementation & ISA specific AlphaDynInst. As with most
* other classes in the new CPU model, it is templated on the Impl to
@@ -56,6 +59,8 @@ class AlphaDynInst : public BaseDynInst<Impl>
typedef TheISA::RegIndex RegIndex;
/** Integer register index type. */
typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
/** Misc register index type. */
typedef TheISA::MiscReg MiscReg;
@@ -79,7 +84,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
Fault initiateAcc();
/** Completes the access. Only valid for memory operations. */
- Fault completeAcc();
+ Fault completeAcc(Packet *pkt);
private:
/** Initializes variables. */
@@ -123,7 +128,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
bool simPalCheck(int palFunc);
#else
/** Calls a syscall. */
- void syscall();
+ void syscall(int64_t callnum);
#endif
private:
diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh
index 541d5ab82..16c236b4c 100644
--- a/src/cpu/o3/alpha_dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha_dyn_inst_impl.hh
@@ -96,15 +96,13 @@ AlphaDynInst<Impl>::initiateAcc()
template <class Impl>
Fault
-AlphaDynInst<Impl>::completeAcc()
+AlphaDynInst<Impl>::completeAcc(Packet *pkt)
{
if (this->isLoad()) {
- this->fault = this->staticInst->completeAcc(this->req->data,
- this,
+ this->fault = this->staticInst->completeAcc(pkt, this,
this->traceData);
} else if (this->isStore()) {
- this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
- this,
+ this->fault = this->staticInst->completeAcc(pkt, this,
this->traceData);
} else {
panic("Unknown type!");
@@ -168,9 +166,9 @@ AlphaDynInst<Impl>::simPalCheck(int palFunc)
#else
template <class Impl>
void
-AlphaDynInst<Impl>::syscall()
+AlphaDynInst<Impl>::syscall(int64_t callnum)
{
- this->cpu->syscall(this->threadNumber);
+ this->cpu->syscall(callnum, this->threadNumber);
}
#endif
diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh
index e3acf2c05..04366e8dd 100644
--- a/src/cpu/o3/alpha_params.hh
+++ b/src/cpu/o3/alpha_params.hh
@@ -35,8 +35,7 @@
class AlphaDTB;
class AlphaITB;
class FUPool;
-class FunctionalMemory;
-class MemInterface;
+class MemObject;
class Process;
class System;
@@ -60,7 +59,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params
//Page Table
// PageTable *pTable;
- FunctionalMemory *mem;
+ MemObject *mem;
BaseCPU *checker;
@@ -69,8 +68,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params
//
// Caches
//
- MemInterface *icacheInterface;
- MemInterface *dcacheInterface;
+// MemInterface *icacheInterface;
+// MemInterface *dcacheInterface;
unsigned cachePorts;
diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc
index 92344111f..dcc5ceb80 100644
--- a/src/cpu/o3/bpred_unit.cc
+++ b/src/cpu/o3/bpred_unit.cc
@@ -30,8 +30,8 @@
#include "cpu/o3/alpha_impl.hh"
#include "cpu/o3/alpha_dyn_inst.hh"
#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
template class TwobitBPredUnit<AlphaSimpleImpl>;
template class TwobitBPredUnit<OzoneImpl>;
-template class TwobitBPredUnit<SimpleImpl>;
+//template class TwobitBPredUnit<SimpleImpl>;
diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh
index b9ff42573..c7dc1808b 100644
--- a/src/cpu/o3/btb.hh
+++ b/src/cpu/o3/btb.hh
@@ -31,6 +31,7 @@
// For Addr type.
#include "arch/isa_traits.hh"
+#include "base/misc.hh"
class DefaultBTB
{
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 66abf8dc6..c019ef4c7 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -34,7 +34,6 @@
#include "base/timebuf.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
-#include "mem/memory_interface.hh"
template <class>
class O3ThreadState;
@@ -301,9 +300,6 @@ class DefaultCommit
/** Pointer to FullCPU. */
FullCPU *cpu;
- /** Memory interface. Used for d-cache accesses. */
- MemInterface *dcacheInterface;
-
std::vector<Thread *> thread;
Fault fetchFault;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 346a8bc1c..97703c430 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -64,8 +64,7 @@ DefaultCommit<Impl>::TrapEvent::description()
template <class Impl>
DefaultCommit<Impl>::DefaultCommit(Params *params)
- : dcacheInterface(params->dcacheInterface),
- squashCounter(0),
+ : squashCounter(0),
iewToCommitDelay(params->iewToCommitDelay),
commitToIEWDelay(params->commitToIEWDelay),
renameToROBDelay(params->renameToROBDelay),
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index ed02a845b..4e0bb2d2d 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -46,6 +46,7 @@
#include "sim/stat_control.hh"
using namespace std;
+using namespace TheISA;
BaseFullCPU::BaseFullCPU(Params *params)
: BaseCPU(params), cpu_id(0)
@@ -121,14 +122,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
system(params->system),
memCtrl(system->memctrl),
physmem(system->physmem),
- mem(params->mem),
-#else
-// pTable(params->pTable),
- mem(params->workload[0]->getMemory()),
#endif // FULL_SYSTEM
+ mem(params->mem),
switchCount(0),
- icacheInterface(params->icacheInterface),
- dcacheInterface(params->dcacheInterface),
deferRegistration(params->deferRegistration),
numThreads(number_of_threads)
{
@@ -782,6 +778,7 @@ FullO3CPU<Impl>::readFloatReg(int reg_idx)
template <class Impl>
FloatRegBits
FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
+{
return regFile.readFloatRegBits(reg_idx, width);
}
@@ -843,7 +840,7 @@ FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegSingle(phys_reg);
+ return regFile.readFloatReg(phys_reg);
}
template <class Impl>
@@ -853,7 +850,7 @@ FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegDouble(phys_reg);
+ return regFile.readFloatReg(phys_reg, 64);
}
template <class Impl>
@@ -863,7 +860,7 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegInt(phys_reg);
+ return regFile.readFloatRegBits(phys_reg);
}
template <class Impl>
@@ -881,7 +878,7 @@ FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegSingle(phys_reg, val);
+ regFile.setFloatReg(phys_reg, val);
}
template <class Impl>
@@ -890,7 +887,7 @@ FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegDouble(phys_reg, val);
+ regFile.setFloatReg(phys_reg, val, 64);
}
template <class Impl>
@@ -899,7 +896,7 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegInt(phys_reg, val);
+ regFile.setFloatRegBits(phys_reg, val);
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index bed95ad54..c791b2948 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -35,6 +35,7 @@
#include <set>
#include <vector>
+#include "arch/isa_traits.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "config/full_system.hh"
@@ -50,7 +51,7 @@
template <class>
class Checker;
class ExecContext;
-class MemInterface;
+class MemObject;
class Process;
class BaseFullCPU : public BaseCPU
@@ -63,6 +64,8 @@ class BaseFullCPU : public BaseCPU
void regStats();
+ int readCpuId() { return cpu_id; }
+
protected:
int cpu_id;
};
@@ -71,6 +74,9 @@ template <class Impl>
class FullO3CPU : public BaseFullCPU
{
public:
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+
// Typedefs from the Impl here.
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::Params Params;
@@ -226,14 +232,6 @@ class FullO3CPU : public BaseFullCPU
int getDataAsid(unsigned tid)
{ return regFile.miscRegs[tid].getDataAsid(); }
#else
- /** Check if this address is a valid instruction address. */
- bool validInstAddr(Addr addr,unsigned tid)
- { return thread[tid]->validInstAddr(addr); }
-
- /** Check if this address is a valid data address. */
- bool validDataAddr(Addr addr,unsigned tid)
- { return thread[tid]->validDataAddr(addr); }
-
/** Get instruction asid. */
int getInstAsid(unsigned tid)
{ return thread[tid]->asid; }
@@ -259,13 +257,13 @@ class FullO3CPU : public BaseFullCPU
void setIntReg(int reg_idx, uint64_t val);
- void setFloatReg(int reg_idx, FloatReg val, int width);
+ void setFloatReg(int reg_idx, FloatReg val);
void setFloatReg(int reg_idx, FloatReg val, int width);
void setFloatRegBits(int reg_idx, FloatRegBits val);
- void setFloatRegBits(int reg_idx, FloatRegBits val);
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
uint64_t readArchIntReg(int reg_idx, unsigned tid);
@@ -464,7 +462,7 @@ class FullO3CPU : public BaseFullCPU
#endif
/** Pointer to memory. */
- FunctionalMemory *mem;
+ MemObject *mem;
Sampler *sampler;
diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh
new file mode 100644
index 000000000..f8ae38da4
--- /dev/null
+++ b/src/cpu/o3/dep_graph.hh
@@ -0,0 +1,213 @@
+
+#ifndef __CPU_O3_DEP_GRAPH_HH__
+#define __CPU_O3_DEP_GRAPH_HH__
+
+#include "cpu/o3/comm.hh"
+
+template <class DynInstPtr>
+class DependencyEntry
+{
+ public:
+ DependencyEntry()
+ : inst(NULL), next(NULL)
+ { }
+
+ DynInstPtr inst;
+ //Might want to include data about what arch. register the
+ //dependence is waiting on.
+ DependencyEntry<DynInstPtr> *next;
+};
+
+template <class DynInstPtr>
+class DependencyGraph
+{
+ public:
+ typedef DependencyEntry<DynInstPtr> DepEntry;
+
+ DependencyGraph()
+ : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
+ { }
+
+ void resize(int num_entries);
+
+ void reset();
+
+ void insert(PhysRegIndex idx, DynInstPtr &new_inst);
+
+ void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
+ { dependGraph[idx].inst = new_inst; }
+
+ void clearInst(PhysRegIndex idx)
+ { dependGraph[idx].inst = NULL; }
+
+ void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
+
+ DynInstPtr pop(PhysRegIndex idx);
+
+ bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
+
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dump();
+
+ private:
+ /** Array of linked lists. Each linked list is a list of all the
+ * instructions that depend upon a given register. The actual
+ * register's index is used to index into the graph; ie all
+ * instructions in flight that are dependent upon r34 will be
+ * in the linked list of dependGraph[34].
+ */
+ DepEntry *dependGraph;
+
+ int numEntries;
+
+ // Debug variable, remove when done testing.
+ unsigned memAllocCounter;
+
+ public:
+ uint64_t nodesTraversed;
+ uint64_t nodesRemoved;
+};
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::resize(int num_entries)
+{
+ numEntries = num_entries;
+ dependGraph = new DepEntry[numEntries];
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::reset()
+{
+ // Clear the dependency graph
+ DepEntry *curr;
+ DepEntry *prev;
+
+ for (int i = 0; i < numEntries; ++i) {
+ curr = dependGraph[i].next;
+
+ while (curr) {
+ memAllocCounter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ if (dependGraph[i].inst) {
+ dependGraph[i].inst = NULL;
+ }
+
+ dependGraph[i].next = NULL;
+ }
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
+{
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
+
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DepEntry *new_entry = new DepEntry;
+ new_entry->next = dependGraph[idx].next;
+ new_entry->inst = new_inst;
+
+ // Then actually add it to the chain.
+ dependGraph[idx].next = new_entry;
+
+ ++memAllocCounter;
+}
+
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
+ DynInstPtr &inst_to_remove)
+{
+ DepEntry *prev = &dependGraph[idx];
+ DepEntry *curr = dependGraph[idx].next;
+
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
+
+ nodesRemoved++;
+
+ // Find the instruction to remove within the dependency linked list.
+ while (curr->inst != inst_to_remove) {
+ prev = curr;
+ curr = curr->next;
+ nodesTraversed++;
+
+ assert(curr != NULL);
+ }
+
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
+
+ --memAllocCounter;
+
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
+
+ delete curr;
+}
+
+template <class DynInstPtr>
+DynInstPtr
+DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
+{
+ DepEntry *node;
+ node = dependGraph[idx].next;
+ DynInstPtr inst = NULL;
+ if (node) {
+ inst = node->inst;
+ dependGraph[idx].next = node->next;
+ node->inst = NULL;
+ memAllocCounter--;
+ delete node;
+ }
+ return inst;
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::dump()
+{
+ DepEntry *curr;
+
+ for (int i = 0; i < numEntries; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+ i, curr->inst->readPC(), curr->inst->seqNum);
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x [sn:%lli] ",
+ curr->inst->readPC(), curr->inst->seqNum);
+ }
+
+ cprintf("\n");
+ }
+ cprintf("memAllocCounter: %i\n", memAllocCounter);
+}
+
+#endif // __CPU_O3_DEP_GRAPH_HH__
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 3fcfdc3a1..2b1d93cb7 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -29,10 +29,12 @@
#ifndef __CPU_O3_FETCH_HH__
#define __CPU_O3_FETCH_HH__
+#include "arch/utility.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
-#include "mem/mem_interface.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
#include "sim/eventq.hh"
class Sampler;
@@ -65,6 +67,32 @@ class DefaultFetch
typedef TheISA::MachInst MachInst;
typedef TheISA::ExtMachInst ExtMachInst;
+ class IcachePort : public Port
+ {
+ protected:
+ DefaultFetch<Impl> *fetch;
+
+ public:
+ IcachePort(DefaultFetch<Impl> *_fetch)
+ : Port(_fetch->name() + "-iport"), fetch(_fetch)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
public:
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
@@ -84,8 +112,9 @@ class DefaultFetch
TrapPending,
QuiescePending,
SwitchOut,
- IcacheMissStall,
- IcacheMissComplete
+ IcacheWaitResponse,
+ IcacheRetry,
+ IcacheAccessComplete
};
/** Fetching Policy, Add new policies here.*/
@@ -111,28 +140,6 @@ class DefaultFetch
std::list<unsigned> priorityList;
public:
- class CacheCompletionEvent : public Event
- {
- private:
- MemReqPtr req;
- /** Pointer to fetch. */
- DefaultFetch *fetch;
- /** Thread id. */
-// unsigned threadId;
-
- public:
- /** Constructs a cache completion event, which tells fetch when the
- * cache miss is complete.
- */
- CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch);
-
- /** Processes cache completion event. */
- virtual void process();
- /** Returns the description of the cache completion event. */
- virtual const char *description();
- };
-
- public:
/** DefaultFetch constructor. */
DefaultFetch(Params *params);
@@ -161,7 +168,7 @@ class DefaultFetch
void initStage();
/** Processes cache completion event. */
- void processCacheCompletion(MemReqPtr &req);
+ void processCacheCompletion(PacketPtr pkt);
void switchOut();
@@ -295,8 +302,10 @@ class DefaultFetch
/** Wire used to write any information heading to decode. */
typename TimeBuffer<FetchStruct>::wire toDecode;
+ MemObject *mem;
+
/** Icache interface. */
- MemInterface *icacheInterface;
+ IcachePort *icachePort;
/** BPredUnit. */
BPredUnit branchPred;
@@ -305,8 +314,8 @@ class DefaultFetch
Addr nextPC[Impl::MaxThreads];
- /** Memory request used to access cache. */
- MemReqPtr memReq[Impl::MaxThreads];
+ /** Memory packet used to access cache. */
+ PacketPtr memPkt[Impl::MaxThreads];
/** Variable that tracks if fetch has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 1c5e508f6..a80afbcf4 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -27,12 +27,13 @@
*/
#include "arch/isa_traits.hh"
+#include "arch/utility.hh"
#include "cpu/exetrace.hh"
#include "cpu/o3/fetch.hh"
-#include "mem/base_mem.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
#include "sim/byteswap.hh"
+#include "sim/host.hh"
#include "sim/root.hh"
#if FULL_SYSTEM
@@ -42,42 +43,67 @@
#include "mem/functional/memory_control.hh"
#include "mem/functional/physical.hh"
#include "sim/system.hh"
-#else // !FULL_SYSTEM
-#include "mem/functional/functional.hh"
#endif // FULL_SYSTEM
#include <algorithm>
using namespace std;
+using namespace TheISA;
template<class Impl>
-DefaultFetch<Impl>::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req,
- DefaultFetch *_fetch)
- : Event(&mainEventQueue, Delayed_Writeback_Pri),
- req(_req),
- fetch(_fetch)
+Tick
+DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
{
- this->setFlags(Event::AutoDelete);
+ panic("DefaultFetch doesn't expect recvAtomic callback!");
+ return curTick;
}
template<class Impl>
void
-DefaultFetch<Impl>::CacheCompletionEvent::process()
+DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("DefaultFetch doesn't expect recvFunctional callback!");
+}
+
+template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("DefaultFetch doesn't expect recvStatusChange callback!");
+}
+
+template<class Impl>
+bool
+DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt)
{
- fetch->processCacheCompletion(req);
+ fetch->processCacheCompletion(pkt);
+ return true;
}
template<class Impl>
-const char *
-DefaultFetch<Impl>::CacheCompletionEvent::description()
+void
+DefaultFetch<Impl>::IcachePort::recvRetry()
{
- return "DefaultFetch cache completion event";
+ panic("DefaultFetch doesn't support retry yet.");
+ // we shouldn't get a retry unless we have a packet that we're
+ // waiting to transmit
+/*
+ assert(cpu->dcache_pkt != NULL);
+ assert(cpu->_status == DcacheRetry);
+ Packet *tmp = cpu->dcache_pkt;
+ if (sendTiming(tmp)) {
+ cpu->_status = DcacheWaitResponse;
+ cpu->dcache_pkt = NULL;
+ }
+*/
}
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(Params *params)
- : icacheInterface(params->icacheInterface),
- branchPred(params),
+ : branchPred(params),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
@@ -122,7 +148,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
}
// Size of cache block.
- cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+ cacheBlkSize = 64;
// Create mask to get rid of offset bits.
cacheBlkMask = (cacheBlkSize - 1);
@@ -133,8 +159,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
priorityList.push_back(tid);
- // Create a new memory request.
- memReq[tid] = NULL;
+ memPkt[tid] = NULL;
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
@@ -253,6 +278,9 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
DPRINTF(Fetch, "Setting the CPU pointer.\n");
cpu = cpu_ptr;
+ // Name is finally available, so create the port.
+ icachePort = new IcachePort(this);
+
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
@@ -315,9 +343,9 @@ DefaultFetch<Impl>::initStage()
template<class Impl>
void
-DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
+DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
{
- unsigned tid = req->thread_num;
+ unsigned tid = pkt->req->getThreadNum();
DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
@@ -325,10 +353,11 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// to return.
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
- if (fetchStatus[tid] != IcacheMissStall ||
- req != memReq[tid] ||
+ if (fetchStatus[tid] != IcacheWaitResponse ||
+ pkt != memPkt[tid] ||
isSwitchedOut()) {
++fetchIcacheSquashes;
+ delete pkt;
return;
}
@@ -341,17 +370,19 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
switchToActive();
- // Only switch to IcacheMissComplete if we're not stalled as well.
+ // Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
fetchStatus[tid] = Blocked;
} else {
- fetchStatus[tid] = IcacheMissComplete;
+ fetchStatus[tid] = IcacheAccessComplete;
}
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
// Reset the mem req to NULL.
- memReq[tid] = NULL;
+ delete pkt->req;
+ delete pkt;
+ memPkt[tid] = NULL;
}
template <class Impl>
@@ -475,18 +506,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
- memReq[tid] = new MemReq();
+ // Build request here.
+ RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
+ fetch_PC, cpu->readCpuId(), tid);
- memReq[tid]->asid = tid;
- memReq[tid]->thread_num = tid;
- memReq[tid]->data = new uint8_t[64];
- memReq[tid]->xc = cpu->xcBase(tid);
- memReq[tid]->cmd = Read;
- memReq[tid]->reset(fetch_PC, cacheBlkSize, flags);
+ memPkt[tid] = NULL;
// Translate the instruction request.
//#if FULL_SYSTEM
- fault = cpu->translateInstReq(memReq[tid]);
+ fault = cpu->translateInstReq(mem_req);
//#else
// fault = pTable->translate(memReq[tid]);
//#endif
@@ -508,48 +536,31 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
}
#endif
+ // Build packet here.
+ PacketPtr data_pkt = new Packet(mem_req,
+ Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(cacheData[tid]);
+
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
- fault = cpu->mem->read(memReq[tid], cacheData[tid]);
- // This read may change when the mem interface changes.
+
+ fetchedCacheLines++;
// Now do the timing access to see whether or not the instruction
// exists within the cache.
- if (icacheInterface && !icacheInterface->isBlocked()) {
- DPRINTF(Fetch, "Doing cache access.\n");
-
- memReq[tid]->completionEvent = NULL;
-
- memReq[tid]->time = curTick;
-
- MemAccessResult result = icacheInterface->access(memReq[tid]);
-
- fetchedCacheLines++;
-
- // If the cache missed, then schedule an event to wake
- // up this stage once the cache miss completes.
- // @todo: Possibly allow for longer than 1 cycle cache hits.
- if (result != MA_HIT && icacheInterface->doEvents()) {
-
- memReq[tid]->completionEvent =
- new CacheCompletionEvent(memReq[tid], this);
-
- lastIcacheStall[tid] = curTick;
-
- DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache "
- "miss.\n", tid);
-
- fetchStatus[tid] = IcacheMissStall;
- } else {
- DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction "
- "read.\n", tid);
-
-// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
- }
- } else {
+ if (!icachePort->sendTiming(data_pkt)) {
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
ret_fault = NoFault;
return false;
}
+
+ DPRINTF(Fetch, "Doing cache access.\n");
+
+ lastIcacheStall[tid] = curTick;
+
+ DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
+ "response.\n", tid);
+
+ fetchStatus[tid] = IcacheWaitResponse;
}
ret_fault = fault;
@@ -567,10 +578,11 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
nextPC[tid] = new_PC + instSize;
// Clear the icache miss if it's outstanding.
- if (fetchStatus[tid] == IcacheMissStall && icacheInterface) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
tid);
- memReq[tid] = NULL;
+ delete memPkt[tid];
+ memPkt[tid] = NULL;
}
fetchStatus[tid] = Squashing;
@@ -632,12 +644,12 @@ DefaultFetch<Impl>::updateFetchStatus()
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == Squashing ||
- fetchStatus[tid] == IcacheMissComplete) {
+ fetchStatus[tid] == IcacheAccessComplete) {
if (_status == Inactive) {
DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
- if (fetchStatus[tid] == IcacheMissComplete) {
+ if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
"completion\n",tid);
}
@@ -831,7 +843,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
}
}
- if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
+ if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
fetchStatus[tid] = Blocked;
@@ -882,7 +894,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// If returning from the delay of a cache miss, then update the status
// to running, otherwise do the cache access. Possibly move this up
// to tick() function.
- if (fetchStatus[tid] == IcacheMissComplete) {
+ if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
tid);
@@ -905,11 +917,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
++fetchBlockedCycles;
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
- } else if (fetchStatus[tid] == IcacheMissStall) {
+ } else if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
}
- // Status is Idle, Squashing, Blocked, or IcacheMissStall, so
+ // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
// fetch should do nothing.
return;
}
@@ -917,7 +929,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
++fetchCycles;
// If we had a stall due to an icache miss, then return.
- if (fetchStatus[tid] == IcacheMissStall) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
status_change = true;
return;
@@ -1026,7 +1038,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
// miss)
- if (fetchStatus[tid] == IcacheMissStall) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
panic("Fetch should have exited prior to this!");
}
@@ -1107,7 +1119,7 @@ DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
int tid = *((*activeThreads).begin());
if (fetchStatus[tid] == Running ||
- fetchStatus[tid] == IcacheMissComplete ||
+ fetchStatus[tid] == IcacheAccessComplete ||
fetchStatus[tid] == Idle) {
return tid;
} else {
@@ -1133,7 +1145,7 @@ DefaultFetch<Impl>::roundRobin()
assert(high_pri <= numThreads);
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle) {
priorityList.erase(pri_iter);
@@ -1167,7 +1179,7 @@ DefaultFetch<Impl>::iqCount()
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
@@ -1198,7 +1210,7 @@ DefaultFetch<Impl>::lsqCount()
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index 29e84cd44..daf1007c1 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -33,6 +33,7 @@
#include <queue>
#include "arch/isa_traits.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/comm.hh"
diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc
new file mode 100644
index 000000000..fb2b5c00d
--- /dev/null
+++ b/src/cpu/o3/fu_pool.cc
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sstream>
+
+#include "cpu/o3/fu_pool.hh"
+#include "encumbered/cpu/full/fu_pool.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+
+////////////////////////////////////////////////////////////////////////////
+//
+// A pool of function units
+//
+
+inline void
+FUPool::FUIdxQueue::addFU(int fu_idx)
+{
+ funcUnitsIdx.push_back(fu_idx);
+ ++size;
+}
+
+inline int
+FUPool::FUIdxQueue::getFU()
+{
+ int retval = funcUnitsIdx[idx++];
+
+ if (idx == size)
+ idx = 0;
+
+ return retval;
+}
+
+FUPool::~FUPool()
+{
+ fuListIterator i = funcUnits.begin();
+ fuListIterator end = funcUnits.end();
+ for (; i != end; ++i)
+ delete *i;
+}
+
+
+// Constructor
+FUPool::FUPool(string name, vector<FUDesc *> paramList)
+ : SimObject(name)
+{
+ numFU = 0;
+
+ funcUnits.clear();
+
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ maxOpLatencies[i] = 0;
+ maxIssueLatencies[i] = 0;
+ }
+
+ //
+ // Iterate through the list of FUDescData structures
+ //
+ for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
+
+ //
+ // Don't bother with this if we're not going to create any FU's
+ //
+ if ((*i)->number) {
+ //
+ // Create the FuncUnit object from this structure
+ // - add the capabilities listed in the FU's operation
+ // description
+ //
+ // We create the first unit, then duplicate it as needed
+ //
+ FuncUnit *fu = new FuncUnit;
+
+ OPDDiterator j = (*i)->opDescList.begin();
+ OPDDiterator end = (*i)->opDescList.end();
+ for (; j != end; ++j) {
+ // indicate that this pool has this capability
+ capabilityList.set((*j)->opClass);
+
+ // Add each of the FU's that will have this capability to the
+ // appropriate queue.
+ for (int k = 0; k < (*i)->number; ++k)
+ fuPerCapList[(*j)->opClass].addFU(numFU + k);
+
+ // indicate that this FU has the capability
+ fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
+
+ if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
+ maxOpLatencies[(*j)->opClass] = (*j)->opLat;
+
+ if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
+ maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
+ }
+
+ numFU++;
+
+ // Add the appropriate number of copies of this FU to the list
+ ostringstream s;
+
+ s << (*i)->name() << "(0)";
+ fu->name = s.str();
+ funcUnits.push_back(fu);
+
+ for (int c = 1; c < (*i)->number; ++c) {
+ ostringstream s;
+ numFU++;
+ FuncUnit *fu2 = new FuncUnit(*fu);
+
+ s << (*i)->name() << "(" << c << ")";
+ fu2->name = s.str();
+ funcUnits.push_back(fu2);
+ }
+ }
+ }
+
+ unitBusy.resize(numFU);
+
+ for (int i = 0; i < numFU; i++) {
+ unitBusy[i] = false;
+ }
+}
+
+void
+FUPool::annotateMemoryUnits(unsigned hit_latency)
+{
+ maxOpLatencies[MemReadOp] = hit_latency;
+
+ fuListIterator i = funcUnits.begin();
+ fuListIterator iend = funcUnits.end();
+ for (; i != iend; ++i) {
+ if ((*i)->provides(MemReadOp))
+ (*i)->opLatency(MemReadOp) = hit_latency;
+
+ if ((*i)->provides(MemWriteOp))
+ (*i)->opLatency(MemWriteOp) = hit_latency;
+ }
+}
+
+int
+FUPool::getUnit(OpClass capability)
+{
+ // If this pool doesn't have the specified capability,
+ // return this information to the caller
+ if (!capabilityList[capability])
+ return -2;
+
+ int fu_idx = fuPerCapList[capability].getFU();
+ int start_idx = fu_idx;
+
+ // Iterate through the circular queue if needed, stopping if we've reached
+ // the first element again.
+ while (unitBusy[fu_idx]) {
+ fu_idx = fuPerCapList[capability].getFU();
+ if (fu_idx == start_idx) {
+ // No FU available
+ return -1;
+ }
+ }
+
+ unitBusy[fu_idx] = true;
+
+ return fu_idx;
+}
+
+void
+FUPool::freeUnitNextCycle(int fu_idx)
+{
+ assert(unitBusy[fu_idx]);
+ unitsToBeFreed.push_back(fu_idx);
+}
+
+void
+FUPool::processFreeUnits()
+{
+ while (!unitsToBeFreed.empty()) {
+ int fu_idx = unitsToBeFreed.back();
+ unitsToBeFreed.pop_back();
+
+ assert(unitBusy[fu_idx]);
+
+ unitBusy[fu_idx] = false;
+ }
+}
+
+void
+FUPool::dump()
+{
+ cout << "Function Unit Pool (" << name() << ")\n";
+ cout << "======================================\n";
+ cout << "Free List:\n";
+
+ for (int i = 0; i < numFU; ++i) {
+ if (unitBusy[i]) {
+ continue;
+ }
+
+ cout << " [" << i << "] : ";
+
+ cout << funcUnits[i]->name << " ";
+
+ cout << "\n";
+ }
+
+ cout << "======================================\n";
+ cout << "Busy List:\n";
+ for (int i = 0; i < numFU; ++i) {
+ if (!unitBusy[i]) {
+ continue;
+ }
+
+ cout << " [" << i << "] : ";
+
+ cout << funcUnits[i]->name << " ";
+
+ cout << "\n";
+ }
+}
+
+void
+FUPool::switchOut()
+{
+}
+
+void
+FUPool::takeOverFrom()
+{
+ for (int i = 0; i < numFU; i++) {
+ unitBusy[i] = false;
+ }
+ unitsToBeFreed.clear();
+}
+
+//
+
+////////////////////////////////////////////////////////////////////////////
+//
+// The SimObjects we use to get the FU information into the simulator
+//
+////////////////////////////////////////////////////////////////////////////
+
+//
+// FUPool - Contails a list of FUDesc objects to make available
+//
+
+//
+// The FuPool object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+ SimObjectVectorParam<FUDesc *> FUList;
+
+END_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+ INIT_PARAM(FUList, "list of FU's for this pool")
+
+END_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+
+CREATE_SIM_OBJECT(FUPool)
+{
+ return new FUPool(getInstanceName(), FUList);
+}
+
+REGISTER_SIM_OBJECT("FUPool", FUPool)
+
diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh
new file mode 100644
index 000000000..f590c4149
--- /dev/null
+++ b/src/cpu/o3/fu_pool.hh
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_FU_POOL_HH__
+#define __CPU_O3_FU_POOL_HH__
+
+#include <bitset>
+#include <list>
+#include <string>
+#include <vector>
+
+#include "base/sched_list.hh"
+#include "cpu/op_class.hh"
+#include "sim/sim_object.hh"
+
+class FUDesc;
+class FuncUnit;
+
+/**
+ * Pool of FU's, specific to the new CPU model. The old FU pool had lists of
+ * free units and busy units, and whenever a FU was needed it would iterate
+ * through the free units to find a FU that provided the capability. This pool
+ * has lists of units specific to each of the capabilities, and whenever a FU
+ * is needed, it iterates through that list to find a free unit. The previous
+ * FU pool would have to be ticked each cycle to update which units became
+ * free. This FU pool lets the IEW stage handle freeing units, which frees
+ * them as their scheduled execution events complete. This limits units in this
+ * model to either have identical issue and op latencies, or 1 cycle issue
+ * latencies.
+ */
+class FUPool : public SimObject
+{
+ private:
+ /** Maximum op execution latencies, per op class. */
+ unsigned maxOpLatencies[Num_OpClasses];
+ /** Maximum issue latencies, per op class. */
+ unsigned maxIssueLatencies[Num_OpClasses];
+
+ /** Bitvector listing capabilities of this FU pool. */
+ std::bitset<Num_OpClasses> capabilityList;
+
+ /** Bitvector listing which FUs are busy. */
+ std::vector<bool> unitBusy;
+
+ /** List of units to be freed at the end of this cycle. */
+ std::vector<int> unitsToBeFreed;
+
+ /**
+ * Class that implements a circular queue to hold FU indices. The hope is
+ * that FUs that have been just used will be moved to the end of the queue
+ * by iterating through it, thus leaving free units at the head of the
+ * queue.
+ */
+ class FUIdxQueue {
+ public:
+ /** Constructs a circular queue of FU indices. */
+ FUIdxQueue()
+ : idx(0), size(0)
+ { }
+
+ /** Adds a FU to the queue. */
+ inline void addFU(int fu_idx);
+
+ /** Returns the index of the FU at the head of the queue, and changes
+ * the index to the next element.
+ */
+ inline int getFU();
+
+ private:
+ /** Circular queue index. */
+ int idx;
+
+ /** Size of the queue. */
+ int size;
+
+ /** Queue of FU indices. */
+ std::vector<int> funcUnitsIdx;
+ };
+
+ /** Per op class queues of FUs that provide that capability. */
+ FUIdxQueue fuPerCapList[Num_OpClasses];
+
+ /** Number of FUs. */
+ int numFU;
+
+ /** Functional units. */
+ std::vector<FuncUnit *> funcUnits;
+
+ typedef std::vector<FuncUnit *>::iterator fuListIterator;
+
+ public:
+
+ /** Constructs a FU pool. */
+ FUPool(std::string name, std::vector<FUDesc *> l);
+ ~FUPool();
+
+ /** Annotates units that provide memory operations. Included only because
+ * old FU pool provided this function.
+ */
+ void annotateMemoryUnits(unsigned hit_latency);
+
+ /**
+ * Gets a FU providing the requested capability. Will mark the unit as busy,
+ * but leaves the freeing of the unit up to the IEW stage.
+ * @param capability The capability requested.
+ * @return Returns -2 if the FU pool does not have the capability, -1 if
+ * there is no free FU, and the FU's index otherwise.
+ */
+ int getUnit(OpClass capability);
+
+ /** Frees a FU at the end of this cycle. */
+ void freeUnitNextCycle(int fu_idx);
+
+ /** Frees all FUs on the list. */
+ void processFreeUnits();
+
+ /** Returns the total number of FUs. */
+ int size() { return numFU; }
+
+ /** Debugging function used to dump FU information. */
+ void dump();
+
+ /** Returns the operation execution latency of the given capability. */
+ unsigned getOpLatency(OpClass capability) {
+ return maxOpLatencies[capability];
+ }
+
+ /** Returns the issue latency of the given capability. */
+ unsigned getIssueLatency(OpClass capability) {
+ return maxIssueLatencies[capability];
+ }
+
+ void switchOut();
+ void takeOverFrom();
+};
+
+#endif // __CPU_O3_FU_POOL_HH__
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 935320628..c931669c6 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -111,25 +111,6 @@ class DefaultIEW
StageStatus wbStatus;
public:
- /** LdWriteback event for a load completion. */
- class LdWritebackEvent : public Event {
- private:
- /** Instruction that is writing back data to the register file. */
- DynInstPtr inst;
- /** Pointer to IEW stage. */
- DefaultIEW<Impl> *iewStage;
-
- public:
- /** Constructs a load writeback event. */
- LdWritebackEvent(DynInstPtr &_inst, DefaultIEW<Impl> *_iew);
-
- /** Processes writeback event. */
- virtual void process();
- /** Returns the description of the writeback event. */
- virtual const char *description();
- };
-
- public:
/** Constructs a DefaultIEW with the given parameters. */
DefaultIEW(Params *params);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index b0137d7fc..955ebfdf3 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -39,58 +39,6 @@
using namespace std;
template<class Impl>
-DefaultIEW<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
- DefaultIEW<Impl> *_iew)
- : Event(&mainEventQueue), inst(_inst), iewStage(_iew)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-DefaultIEW<Impl>::LdWritebackEvent::process()
-{
- DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
- DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
- //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
- if (iewStage->isSwitchedOut()) {
- inst = NULL;
- return;
- } else if (inst->isSquashed()) {
- iewStage->wakeCPU();
- inst = NULL;
- return;
- }
-
- iewStage->wakeCPU();
-
- if (!inst->isExecuted()) {
- inst->setExecuted();
-
- // Complete access to copy data to proper place.
- if (inst->isStore()) {
- inst->completeAcc();
- }
- }
-
- // Need to insert instruction into queue to commit
- iewStage->instToCommit(inst);
-
- iewStage->activityThisCycle();
-
- inst = NULL;
-}
-
-template<class Impl>
-const char *
-DefaultIEW<Impl>::LdWritebackEvent::description()
-{
- return "Load writeback event";
-}
-
-template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: // @todo: Make this into a parameter.
issueToExecQueue(5, 5),
@@ -1280,7 +1228,7 @@ DefaultIEW<Impl>::executeInsts()
ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
- if (inst->req && !(inst->req->flags & LOCKED)) {
+ if (inst->req && !(inst->req->getFlags() & LOCKED)) {
inst->setExecuted();
instToCommit(inst);
@@ -1556,7 +1504,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
else
iewExecutedInsts++;
#else
- iewExecutedInsts[thread_number]++;
+ iewExecutedInsts++;
#endif
//
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 518de73d9..843f6a8fe 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -38,7 +38,7 @@
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dep_graph.hh"
-#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/op_class.hh"
#include "sim/host.hh"
class FUPool;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index f1dc4e01f..4fa756cb6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -64,8 +64,7 @@ InstructionQueue<Impl>::FUCompletion::description()
template <class Impl>
InstructionQueue<Impl>::InstructionQueue(Params *params)
- : dcacheInterface(params->dcacheInterface),
- fuPool(params->fuPool),
+ : fuPool(params->fuPool),
numEntries(params->numIQEntries),
totalWidth(params->issueWidth),
numPhysIntRegs(params->numPhysIntRegs),
diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc
new file mode 100644
index 000000000..8991ab8f8
--- /dev/null
+++ b/src/cpu/o3/lsq.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQ<AlphaSimpleImpl>;
+
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
new file mode 100644
index 000000000..51eb23cd7
--- /dev/null
+++ b/src/cpu/o3/lsq.hh
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_HH__
+#define __CPU_O3_LSQ_HH__
+
+#include <map>
+#include <queue>
+
+#include "config/full_system.hh"
+#include "cpu/inst_seq.hh"
+//#include "cpu/o3/cpu_policy.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+template <class Impl>
+class LSQ {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::LSQUnit LSQUnit;
+
+ enum LSQPolicy {
+ Dynamic,
+ Partitioned,
+ Threshold
+ };
+
+ /** Constructs an LSQ with the given parameters. */
+ LSQ(Params *params);
+
+ /** Returns the name of the LSQ. */
+ std::string name() const;
+
+ /** Sets the pointer to the list of active threads. */
+ void setActiveThreads(std::list<unsigned> *at_ptr);
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+ /** Sets the IEW stage pointer. */
+ void setIEW(IEW *iew_ptr);
+ /** Sets the page table pointer. */
+// void setPageTable(PageTable *pt_ptr);
+
+ void switchOut();
+ void takeOverFrom();
+
+ /** Number of entries needed for the given amount of threads.*/
+ int entryAmount(int num_threads);
+ void removeEntries(unsigned tid);
+ /** Reset the max entries for each thread. */
+ void resetEntries();
+ /** Resize the max entries for a thread. */
+ void resizeEntries(unsigned size, unsigned tid);
+
+ /** Ticks the LSQ. */
+ void tick();
+ /** Ticks a specific LSQ Unit. */
+ void tick(unsigned tid)
+ { thread[tid].tick(); }
+
+ /** Inserts a load into the LSQ. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store into the LSQ. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ Fault executeLoad(int lq_idx, unsigned tid)
+ { return thread[tid].executeLoad(lq_idx); }
+
+ /** Executes a store. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /**
+ * Commits loads up until the given sequence number for a specific thread.
+ */
+ void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitLoads(youngest_inst); }
+
+ /**
+ * Commits stores up until the given sequence number for a specific thread.
+ */
+ void commitStores(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitStores(youngest_inst); }
+
+ /**
+ * Attempts to write back stores until all cache ports are used or the
+ * interface becomes blocked.
+ */
+ void writebackStores();
+ /** Same as above, but only for one thread. */
+ void writebackStores(unsigned tid);
+
+ /**
+ * Squash instructions from a thread until the specified sequence number.
+ */
+ void squash(const InstSeqNum &squashed_num, unsigned tid)
+ { thread[tid].squash(squashed_num); }
+
+ /** Returns whether or not there was a memory ordering violation. */
+ bool violation();
+ /**
+ * Returns whether or not there was a memory ordering violation for a
+ * specific thread.
+ */
+ bool violation(unsigned tid)
+ { return thread[tid].violation(); }
+
+ /** Returns if a load is blocked due to the memory system for a specific
+ * thread.
+ */
+ bool loadBlocked(unsigned tid)
+ { return thread[tid].loadBlocked(); }
+
+ bool isLoadBlockedHandled(unsigned tid)
+ { return thread[tid].isLoadBlockedHandled(); }
+
+ void setLoadBlockedHandled(unsigned tid)
+ { thread[tid].setLoadBlockedHandled(); }
+
+ /** Gets the instruction that caused the memory ordering violation. */
+ DynInstPtr getMemDepViolator(unsigned tid)
+ { return thread[tid].getMemDepViolator(); }
+
+ /** Returns the head index of the load queue for a specific thread. */
+ int getLoadHead(unsigned tid)
+ { return thread[tid].getLoadHead(); }
+
+ /** Returns the sequence number of the head of the load queue. */
+ InstSeqNum getLoadHeadSeqNum(unsigned tid)
+ {
+ return thread[tid].getLoadHeadSeqNum();
+ }
+
+ /** Returns the head index of the store queue. */
+ int getStoreHead(unsigned tid)
+ { return thread[tid].getStoreHead(); }
+
+ /** Returns the sequence number of the head of the store queue. */
+ InstSeqNum getStoreHeadSeqNum(unsigned tid)
+ {
+ return thread[tid].getStoreHeadSeqNum();
+ }
+
+ /** Returns the number of instructions in all of the queues. */
+ int getCount();
+ /** Returns the number of instructions in the queues of one thread. */
+ int getCount(unsigned tid)
+ { return thread[tid].getCount(); }
+
+ /** Returns the total number of loads in the load queue. */
+ int numLoads();
+ /** Returns the total number of loads for a single thread. */
+ int numLoads(unsigned tid)
+ { return thread[tid].numLoads(); }
+
+ /** Returns the total number of stores in the store queue. */
+ int numStores();
+ /** Returns the total number of stores for a single thread. */
+ int numStores(unsigned tid)
+ { return thread[tid].numStores(); }
+
+ /** Returns the total number of loads that are ready. */
+ int numLoadsReady();
+ /** Returns the number of loads that are ready for a single thread. */
+ int numLoadsReady(unsigned tid)
+ { return thread[tid].numLoadsReady(); }
+
+ /** Returns the number of free entries. */
+ unsigned numFreeEntries();
+ /** Returns the number of free entries for a specific thread. */
+ unsigned numFreeEntries(unsigned tid);
+
+ /** Returns if the LSQ is full (either LQ or SQ is full). */
+ bool isFull();
+ /**
+ * Returns if the LSQ is full for a specific thread (either LQ or SQ is
+ * full).
+ */
+ bool isFull(unsigned tid);
+
+ /** Returns if any of the LQs are full. */
+ bool lqFull();
+ /** Returns if the LQ of a given thread is full. */
+ bool lqFull(unsigned tid);
+
+ /** Returns if any of the SQs are full. */
+ bool sqFull();
+ /** Returns if the SQ of a given thread is full. */
+ bool sqFull(unsigned tid);
+
+ /**
+ * Returns if the LSQ is stalled due to a memory operation that must be
+ * replayed.
+ */
+ bool isStalled();
+ /**
+ * Returns if the LSQ of a specific thread is stalled due to a memory
+ * operation that must be replayed.
+ */
+ bool isStalled(unsigned tid);
+
+ /** Returns whether or not there are any stores to write back to memory. */
+ bool hasStoresToWB();
+
+ /** Returns whether or not a specific thread has any stores to write back
+ * to memory.
+ */
+ bool hasStoresToWB(unsigned tid)
+ { return thread[tid].hasStoresToWB(); }
+
+ /** Returns the number of stores a specific thread has to write back. */
+ int numStoresToWB(unsigned tid)
+ { return thread[tid].numStoresToWB(); }
+
+ /** Returns if the LSQ will write back to memory this cycle. */
+ bool willWB();
+ /** Returns if the LSQ of a specific thread will write back to memory this
+ * cycle.
+ */
+ bool willWB(unsigned tid)
+ { return thread[tid].willWB(); }
+
+ /** Debugging function to print out all instructions. */
+ void dumpInsts();
+ /** Debugging function to print out instructions from a specific thread. */
+ void dumpInsts(unsigned tid)
+ { thread[tid].dumpInsts(); }
+
+ /** Executes a read operation, using the load specified at the load index. */
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ /** Executes a store operation, using the store specified at the store
+ * index.
+ */
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ private:
+ /** The LSQ policy for SMT mode. */
+ LSQPolicy lsqPolicy;
+
+ /** The LSQ units for individual threads. */
+ LSQUnit thread[Impl::MaxThreads];
+
+ /** The CPU pointer. */
+ FullCPU *cpu;
+
+ /** The IEW stage pointer. */
+ IEW *iewStage;
+
+ /** The pointer to the page table. */
+// PageTable *pTable;
+
+ /** List of Active Threads in System. */
+ std::list<unsigned> *activeThreads;
+
+ /** Total Size of LQ Entries. */
+ unsigned LQEntries;
+ /** Total Size of SQ Entries. */
+ unsigned SQEntries;
+
+ /** Max LQ Size - Used to Enforce Sharing Policies. */
+ unsigned maxLQEntries;
+
+ /** Max SQ Size - Used to Enforce Sharing Policies. */
+ unsigned maxSQEntries;
+
+ /** Number of Threads. */
+ unsigned numThreads;
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+ unsigned tid = req->getThreadNum();
+
+ return thread[tid].read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+ unsigned tid = req->getThreadNum();
+
+ return thread[tid].write(req, data, store_idx);
+}
+
+#endif // __CPU_O3_LSQ_HH__
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
new file mode 100644
index 000000000..a6ad27522
--- /dev/null
+++ b/src/cpu/o3/lsq_impl.hh
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <string>
+
+#include "cpu/o3/lsq.hh"
+
+using namespace std;
+
+template <class Impl>
+LSQ<Impl>::LSQ(Params *params)
+ : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
+ numThreads(params->numberOfThreads)
+{
+ DPRINTF(LSQ, "Creating LSQ object.\n");
+
+ //**********************************************/
+ //************ Handle SMT Parameters ***********/
+ //**********************************************/
+ string policy = params->smtLSQPolicy;
+
+ //Convert string to lowercase
+ std::transform(policy.begin(), policy.end(), policy.begin(),
+ (int(*)(int)) tolower);
+
+ //Figure out fetch policy
+ if (policy == "dynamic") {
+ lsqPolicy = Dynamic;
+
+ maxLQEntries = LQEntries;
+ maxSQEntries = SQEntries;
+
+ DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
+
+ } else if (policy == "partitioned") {
+ lsqPolicy = Partitioned;
+
+ //@todo:make work if part_amt doesnt divide evenly.
+ maxLQEntries = LQEntries / numThreads;
+ maxSQEntries = SQEntries / numThreads;
+
+ DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
+ "%i entries per LQ | %i entries per SQ",
+ maxLQEntries,maxSQEntries);
+
+ } else if (policy == "threshold") {
+ lsqPolicy = Threshold;
+
+ assert(params->smtLSQThreshold > LQEntries);
+ assert(params->smtLSQThreshold > SQEntries);
+
+ //Divide up by threshold amount
+ //@todo: Should threads check the max and the total
+ //amount of the LSQ
+ maxLQEntries = params->smtLSQThreshold;
+ maxSQEntries = params->smtLSQThreshold;
+
+ DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
+ "%i entries per LQ | %i entries per SQ",
+ maxLQEntries,maxSQEntries);
+
+ } else {
+ assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
+ "Partitioned, Threshold}");
+ }
+
+ //Initialize LSQs
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+ }
+}
+
+
+template<class Impl>
+std::string
+LSQ<Impl>::name() const
+{
+ return iewStage->name() + ".lsq";
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
+ activeThreads = at_ptr;
+ assert(activeThreads != 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setCPU(cpu_ptr);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setIEW(IEW *iew_ptr)
+{
+ iewStage = iew_ptr;
+
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setIEW(iew_ptr);
+ }
+}
+
+#if 0
+template<class Impl>
+void
+LSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setPageTable(pt_ptr);
+ }
+}
+#endif
+
+template <class Impl>
+void
+LSQ<Impl>::switchOut()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].switchOut();
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::takeOverFrom()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].takeOverFrom();
+ }
+}
+
+template <class Impl>
+int
+LSQ<Impl>::entryAmount(int num_threads)
+{
+ if (lsqPolicy == Partitioned) {
+ return LQEntries / num_threads;
+ } else {
+ return 0;
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::resetEntries()
+{
+ if (lsqPolicy != Dynamic || numThreads > 1) {
+ int active_threads = (*activeThreads).size();
+
+ list<unsigned>::iterator threads = (*activeThreads).begin();
+ list<unsigned>::iterator list_end = (*activeThreads).end();
+
+ int maxEntries;
+
+ if (lsqPolicy == Partitioned) {
+ maxEntries = LQEntries / active_threads;
+ } else if (lsqPolicy == Threshold && active_threads == 1) {
+ maxEntries = LQEntries;
+ } else {
+ maxEntries = LQEntries;
+ }
+
+ while (threads != list_end) {
+ resizeEntries(maxEntries,*threads++);
+ }
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::removeEntries(unsigned tid)
+{
+ thread[tid].clearLQ();
+ thread[tid].clearSQ();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+{
+ thread[tid].resizeLQ(size);
+ thread[tid].resizeSQ(size);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::tick()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+
+ thread[tid].tick();
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ unsigned tid = load_inst->threadNumber;
+
+ thread[tid].insertLoad(load_inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ unsigned tid = store_inst->threadNumber;
+
+ thread[tid].insertStore(store_inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+ unsigned tid = inst->threadNumber;
+
+ return thread[tid].executeLoad(inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeStore(DynInstPtr &inst)
+{
+ unsigned tid = inst->threadNumber;
+
+ return thread[tid].executeStore(inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::writebackStores()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+
+ if (numStoresToWB(tid) > 0) {
+ DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+ "available for Writeback.\n", tid, numStoresToWB(tid));
+ }
+
+ thread[tid].writebackStores();
+ }
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::violation()
+{
+ /* Answers: Does Anybody Have a Violation?*/
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (thread[tid].violation())
+ return true;
+ }
+
+ return false;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::getCount()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += getCount(tid);
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoads()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += numLoads(tid);
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numStores()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numStores();
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoadsReady()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numLoadsReady();
+ }
+
+ return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numFreeEntries();
+ }
+
+ return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries(unsigned tid)
+{
+ //if( lsqPolicy == Dynamic )
+ //return numFreeEntries();
+ //else
+ return thread[tid].numFreeEntries();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (! (thread[tid].lqFull() || thread[tid].sqFull()) )
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return isFull();
+ else
+ return thread[tid].lqFull() || thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!thread[tid].lqFull())
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return lqFull();
+ else
+ return thread[tid].lqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!sqFull(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return sqFull();
+ else
+ return thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!thread[tid].isStalled())
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled(unsigned tid)
+{
+ if( lsqPolicy == Dynamic )
+ return isStalled();
+ else
+ return thread[tid].isStalled();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::hasStoresToWB()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!hasStoresToWB(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::willWB()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!willWB(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::dumpInsts()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ thread[tid].dumpInsts();
+ }
+}
diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc
new file mode 100644
index 000000000..dd29007bc
--- /dev/null
+++ b/src/cpu/o3/lsq_unit.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_unit_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQUnit<AlphaSimpleImpl>;
+
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
new file mode 100644
index 000000000..b339cea2c
--- /dev/null
+++ b/src/cpu/o3/lsq_unit.hh
@@ -0,0 +1,629 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_UNIT_HH__
+#define __CPU_O3_LSQ_UNIT_HH__
+
+#include <algorithm>
+#include <map>
+#include <queue>
+
+#include "arch/faults.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+//#include "sim/debug.hh"
+//#include "sim/sim_object.hh"
+
+/**
+ * Class that implements the actual LQ and SQ for each specific
+ * thread. Both are circular queues; load entries are freed upon
+ * committing, while store entries are freed once they writeback. The
+ * LSQUnit tracks if there are memory ordering violations, and also
+ * detects partial load to store forwarding cases (a store only has
+ * part of a load's data) that requires the load to wait until the
+ * store writes back. In the former case it holds onto the instruction
+ * until the dependence unit looks at it, and in the latter it stalls
+ * the LSQ until the store writes back. At that point the load is
+ * replayed.
+ */
+template <class Impl>
+class LSQUnit {
+ protected:
+ typedef TheISA::IntReg IntReg;
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+
+ public:
+ /** Constructs an LSQ unit. init() must be called prior to use. */
+ LSQUnit();
+
+ /** Initializes the LSQ unit with the specified number of entries. */
+ void init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id);
+
+ /** Returns the name of the LSQ unit. */
+ std::string name() const;
+
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
+ /** Sets the IEW stage pointer. */
+ void setIEW(IEW *iew_ptr)
+ { iewStage = iew_ptr; }
+
+ /** Sets the page table pointer. */
+// void setPageTable(PageTable *pt_ptr);
+
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ /** Ticks the LSQ unit, which in this case only resets the number of
+ * used cache ports.
+ * @todo: Move the number of used ports up to the LSQ level so it can
+ * be shared by all LSQ units.
+ */
+ void tick() { usedPorts = 0; }
+
+ /** Inserts an instruction. */
+ void insert(DynInstPtr &inst);
+ /** Inserts a load instruction. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store instruction. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load instruction. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
+ /** Executes a store instruction. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /** Commits the head load. */
+ void commitLoad();
+ /** Commits loads older than a specific sequence number. */
+ void commitLoads(InstSeqNum &youngest_inst);
+
+ /** Commits stores older than a specific sequence number. */
+ void commitStores(InstSeqNum &youngest_inst);
+
+ /** Writes back stores. */
+ void writebackStores();
+
+ void completeDataAccess(PacketPtr pkt);
+
+ void completeStoreDataAccess(DynInstPtr &inst);
+
+ // @todo: Include stats in the LSQ unit.
+ //void regStats();
+
+ /** Clears all the entries in the LQ. */
+ void clearLQ();
+
+ /** Clears all the entries in the SQ. */
+ void clearSQ();
+
+ /** Resizes the LQ to a given size. */
+ void resizeLQ(unsigned size);
+
+ /** Resizes the SQ to a given size. */
+ void resizeSQ(unsigned size);
+
+ /** Squashes all instructions younger than a specific sequence number. */
+ void squash(const InstSeqNum &squashed_num);
+
+ /** Returns if there is a memory ordering violation. Value is reset upon
+ * call to getMemDepViolator().
+ */
+ bool violation() { return memDepViolator; }
+
+ /** Returns the memory ordering violator. */
+ DynInstPtr getMemDepViolator();
+
+ /** Returns if a load became blocked due to the memory system. */
+ bool loadBlocked()
+ { return isLoadBlocked; }
+
+ void clearLoadBlocked()
+ { isLoadBlocked = false; }
+
+ bool isLoadBlockedHandled()
+ { return loadBlockedHandled; }
+
+ void setLoadBlockedHandled()
+ { loadBlockedHandled = true; }
+
+ /** Returns the number of free entries (min of free LQ and SQ entries). */
+ unsigned numFreeEntries();
+
+ /** Returns the number of loads ready to execute. */
+ int numLoadsReady();
+
+ /** Returns the number of loads in the LQ. */
+ int numLoads() { return loads; }
+
+ /** Returns the number of stores in the SQ. */
+ int numStores() { return stores; }
+
+ /** Returns if either the LQ or SQ is full. */
+ bool isFull() { return lqFull() || sqFull(); }
+
+ /** Returns if the LQ is full. */
+ bool lqFull() { return loads >= (LQEntries - 1); }
+
+ /** Returns if the SQ is full. */
+ bool sqFull() { return stores >= (SQEntries - 1); }
+
+ /** Returns the number of instructions in the LSQ. */
+ unsigned getCount() { return loads + stores; }
+
+ /** Returns if there are any stores to writeback. */
+ bool hasStoresToWB() { return storesToWB; }
+
+ /** Returns the number of stores to writeback. */
+ int numStoresToWB() { return storesToWB; }
+
+ /** Returns if the LSQ unit will writeback on this cycle. */
+ bool willWB() { return storeQueue[storeWBIdx].canWB &&
+ !storeQueue[storeWBIdx].completed/* &&
+ !dcacheInterface->isBlocked()*/; }
+
+ private:
+ /** Completes the store at the specified index. */
+ void completeStore(int store_idx);
+
+ /** Increments the given store index (circular queue). */
+ inline void incrStIdx(int &store_idx);
+ /** Decrements the given store index (circular queue). */
+ inline void decrStIdx(int &store_idx);
+ /** Increments the given load index (circular queue). */
+ inline void incrLdIdx(int &load_idx);
+ /** Decrements the given load index (circular queue). */
+ inline void decrLdIdx(int &load_idx);
+
+ public:
+ /** Debugging function to dump instructions in the LSQ. */
+ void dumpInsts();
+
+ private:
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
+ /** Pointer to the IEW stage. */
+ IEW *iewStage;
+
+ MemObject *mem;
+
+ class DcachePort : public Port
+ {
+ protected:
+ FullCPU *cpu;
+ LSQUnit *lsq;
+
+ public:
+ DcachePort(FullCPU *_cpu, LSQUnit *_lsq)
+ : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
+ /** Pointer to the D-cache. */
+ DcachePort *dcachePort;
+
+ /** Pointer to the page table. */
+// PageTable *pTable;
+
+ public:
+ struct SQEntry {
+ /** Constructs an empty store queue entry. */
+ SQEntry()
+ : inst(NULL), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** Constructs a store queue entry for a given instruction. */
+ SQEntry(DynInstPtr &_inst)
+ : inst(_inst), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** The store instruction. */
+ DynInstPtr inst;
+ /** The request for the store. */
+ RequestPtr req;
+ /** The size of the store. */
+ int size;
+ /** The store data. */
+ IntReg data;
+ /** Whether or not the store can writeback. */
+ bool canWB;
+ /** Whether or not the store is committed. */
+ bool committed;
+ /** Whether or not the store is completed. */
+ bool completed;
+ };
+
+ private:
+ /** The LSQUnit thread id. */
+ unsigned lsqID;
+
+ /** The store queue. */
+ std::vector<SQEntry> storeQueue;
+
+ /** The load queue. */
+ std::vector<DynInstPtr> loadQueue;
+
+ /** The number of LQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of LQ entries.
+ */
+ unsigned LQEntries;
+ /** The number of SQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of SQ entries.
+ */
+ unsigned SQEntries;
+
+ /** The number of load instructions in the LQ. */
+ int loads;
+ /** The number of store instructions in the SQ. */
+ int stores;
+ /** The number of store instructions in the SQ waiting to writeback. */
+ int storesToWB;
+
+ /** The index of the head instruction in the LQ. */
+ int loadHead;
+ /** The index of the tail instruction in the LQ. */
+ int loadTail;
+
+ /** The index of the head instruction in the SQ. */
+ int storeHead;
+ /** The index of the first instruction that may be ready to be
+ * written back, and has not yet been written back.
+ */
+ int storeWBIdx;
+ /** The index of the tail instruction in the SQ. */
+ int storeTail;
+
+ /// @todo Consider moving to a more advanced model with write vs read ports
+ /** The number of cache ports available each cycle. */
+ int cachePorts;
+
+ /** The number of used cache ports in this cycle. */
+ int usedPorts;
+
+ bool switchedOut;
+
+ //list<InstSeqNum> mshrSeqNums;
+
+ /** Wire to read information from the issue stage time queue. */
+ typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+ /** Whether or not the LSQ is stalled. */
+ bool stalled;
+ /** The store that causes the stall due to partial store to load
+ * forwarding.
+ */
+ InstSeqNum stallingStoreIsn;
+ /** The index of the above store. */
+ int stallingLoadIdx;
+
+ /** Whether or not a load is blocked due to the memory system. */
+ bool isLoadBlocked;
+
+ bool loadBlockedHandled;
+
+ InstSeqNum blockedLoadSeqNum;
+
+ /** The oldest load that caused a memory ordering violation. */
+ DynInstPtr memDepViolator;
+
+ // Will also need how many read/write ports the Dcache has. Or keep track
+ // of that in stage that is one level up, and only call executeLoad/Store
+ // the appropriate number of times.
+/*
+ // total number of loads forwaded from LSQ stores
+ Stats::Vector<> lsq_forw_loads;
+
+ // total number of loads ignored due to invalid addresses
+ Stats::Vector<> inv_addr_loads;
+
+ // total number of software prefetches ignored due to invalid addresses
+ Stats::Vector<> inv_addr_swpfs;
+
+ // total non-speculative bogus addresses seen (debug var)
+ Counter sim_invalid_addrs;
+ Stats::Vector<> fu_busy; //cumulative fu busy
+
+ // ready loads blocked due to memory disambiguation
+ Stats::Vector<> lsq_blocked_loads;
+
+ Stats::Scalar<> lsqInversion;
+*/
+ public:
+ /** Executes the load at the given index. */
+ template <class T>
+ Fault read(Request *req, T &data, int load_idx);
+
+ /** Executes the store at the given index. */
+ template <class T>
+ Fault write(Request *req, T &data, int store_idx);
+
+ /** Returns the index of the head load instruction. */
+ int getLoadHead() { return loadHead; }
+ /** Returns the sequence number of the head load instruction. */
+ InstSeqNum getLoadHeadSeqNum()
+ {
+ if (loadQueue[loadHead]) {
+ return loadQueue[loadHead]->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns the index of the head store instruction. */
+ int getStoreHead() { return storeHead; }
+ /** Returns the sequence number of the head store instruction. */
+ InstSeqNum getStoreHeadSeqNum()
+ {
+ if (storeQueue[storeHead].inst) {
+ return storeQueue[storeHead].inst->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns whether or not the LSQ unit is stalled. */
+ bool isStalled() { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
+{
+ DynInstPtr load_inst = loadQueue[load_idx];
+
+ assert(load_inst);
+
+ assert(!load_inst->isExecuted());
+
+ // Make sure this isn't an uncacheable access
+ // A bit of a hackish way to get uncached accesses to work only if they're
+ // at the head of the LSQ and are ready to commit (at the head of the ROB
+ // too).
+ if (req->getFlags() & UNCACHEABLE &&
+ (load_idx != loadHead || !load_inst->reachedCommit)) {
+ iewStage->rescheduleMemInst(load_inst);
+ return TheISA::genMachineCheckFault();
+ }
+
+ // Check the SQ for any previous stores that might lead to forwarding
+ int store_idx = load_inst->sqIdx;
+
+ int store_size = 0;
+
+ DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
+ "storeHead: %i addr: %#x\n",
+ load_idx, store_idx, storeHead, req->getPaddr());
+
+#if 0
+ if (req->getFlags() & LOCKED) {
+ cpu->lockAddr = req->getPaddr();
+ cpu->lockFlag = true;
+ }
+#endif
+
+ while (store_idx != -1) {
+ // End once we've reached the top of the LSQ
+ if (store_idx == storeWBIdx) {
+ break;
+ }
+
+ // Move the index to one younger
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+
+ assert(storeQueue[store_idx].inst);
+
+ store_size = storeQueue[store_idx].size;
+
+ if (store_size == 0)
+ continue;
+
+ // Check if the store data is within the lower and upper bounds of
+ // addresses that the request needs.
+ bool store_has_lower_limit =
+ req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
+ bool store_has_upper_limit =
+ (req->getVaddr() + req->getSize()) <=
+ (storeQueue[store_idx].inst->effAddr + store_size);
+ bool lower_load_has_store_part =
+ req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
+ store_size);
+ bool upper_load_has_store_part =
+ (req->getVaddr() + req->getSize()) >
+ storeQueue[store_idx].inst->effAddr;
+
+ // If the store's data has all of the data needed, we can forward.
+ if (store_has_lower_limit && store_has_upper_limit) {
+ // Get shift amount for offset into the store's data.
+ int shift_amt = req->getVaddr() & (store_size - 1);
+ // @todo: Magic number, assumes byte addressing
+ shift_amt = shift_amt << 3;
+
+ // Cast this to type T?
+ data = storeQueue[store_idx].data >> shift_amt;
+
+ assert(!load_inst->memData);
+ load_inst->memData = new uint8_t[64];
+
+ memcpy(load_inst->memData, &data, req->getSize());
+
+ DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
+ "addr %#x, data %#x\n",
+ store_idx, req->getVaddr(), *(load_inst->memData));
+/*
+ typename LdWritebackEvent *wb =
+ new typename LdWritebackEvent(load_inst,
+ iewStage);
+
+ // We'll say this has a 1 cycle load-store forwarding latency
+ // for now.
+ // @todo: Need to make this a parameter.
+ wb->schedule(curTick);
+*/
+ // Should keep track of stat for forwarded data
+ return NoFault;
+ } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part && upper_load_has_store_part)) {
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data.
+
+ // If it's already been written back, then don't worry about
+ // stalling on it.
+ if (storeQueue[store_idx].completed) {
+ continue;
+ }
+
+ // Must stall load and force it to retry, so long as it's the oldest
+ // load that needs to do so.
+ if (!stalled ||
+ (stalled &&
+ load_inst->seqNum <
+ loadQueue[stallingLoadIdx]->seqNum)) {
+ stalled = true;
+ stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+ stallingLoadIdx = load_idx;
+ }
+
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
+ iewStage->rescheduleMemInst(load_inst);
+
+ // Do not generate a writeback event as this instruction is not
+ // complete.
+ DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
+ "Store idx %i to load addr %#x\n",
+ store_idx, req->getVaddr());
+
+ return NoFault;
+ }
+ }
+
+ // If there's no forwarding case, then go access memory
+ DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ load_inst->seqNum, load_inst->readPC());
+
+ assert(!load_inst->memData);
+ load_inst->memData = new uint8_t[64];
+
+ ++usedPorts;
+
+ DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
+ load_inst->readPC());
+
+ PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(load_inst->memData);
+
+ // if we have a cache, do cache access too
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // There's an older load that's already going to squash.
+ if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
+ return NoFault;
+
+ // Record that the load was blocked due to memory. This
+ // load will squash all instructions after it, be
+ // refetched, and re-executed.
+ isLoadBlocked = true;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = load_inst->seqNum;
+ // No fault occurred, even though the interface is blocked.
+ return NoFault;
+ }
+
+ if (data_pkt->result != Packet::Success) {
+ DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ load_inst->seqNum);
+ } else {
+ DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+ load_inst->seqNum);
+ }
+
+ return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+
+ DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
+ " | storeHead:%i [sn:%i]\n",
+ store_idx, req->getPaddr(), data, storeHead,
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].req = req;
+ storeQueue[store_idx].size = sizeof(T);
+ storeQueue[store_idx].data = data;
+
+ // This function only writes the data to the store queue, so no fault
+ // can happen here.
+ return NoFault;
+}
+
+#endif // __CPU_O3_LSQ_UNIT_HH__
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
new file mode 100644
index 000000000..3f6af3d2c
--- /dev/null
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -0,0 +1,866 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "base/str.hh"
+#include "mem/request.hh"
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+{
+/*
+ DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
+ DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+ if (iewStage->isSwitchedOut()) {
+ inst = NULL;
+ return;
+ } else if (inst->isSquashed()) {
+ iewStage->wakeCPU();
+ inst = NULL;
+ return;
+ }
+
+ iewStage->wakeCPU();
+
+ if (!inst->isExecuted()) {
+ inst->setExecuted();
+
+ // Complete access to copy data to proper place.
+ inst->completeAcc();
+ }
+
+ // Need to insert instruction into queue to commit
+ iewStage->instToCommit(inst);
+
+ iewStage->activityThisCycle();
+
+ inst = NULL;
+*/
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
+{
+/*
+ DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
+ DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
+
+ //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+ if (lsqPtr->isSwitchedOut())
+ return;
+
+ lsqPtr->cpu->wakeCPU();
+
+ if (wb)
+ lsqPtr->completeDataAccess(storeIdx);
+ lsqPtr->completeStore(storeIdx);
+*/
+}
+
+template <class Impl>
+Tick
+LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+ panic("O3CPU model does not work with atomic mode!");
+ return curTick;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+ lsq->completeDataAccess(pkt);
+ return true;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvRetry()
+{
+ panic("Retry unsupported for now!");
+ // we shouldn't get a retry unless we have a packet that we're
+ // waiting to transmit
+/*
+ assert(cpu->dcache_pkt != NULL);
+ assert(cpu->_status == DcacheRetry);
+ PacketPtr tmp = cpu->dcache_pkt;
+ if (sendTiming(tmp)) {
+ cpu->_status = DcacheWaitResponse;
+ cpu->dcache_pkt = NULL;
+ }
+*/
+}
+
+template <class Impl>
+LSQUnit<Impl>::LSQUnit()
+ : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ loadBlockedHandled(false)
+{
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id)
+{
+ DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+
+ switchedOut = false;
+
+ lsqID = id;
+
+ // Add 1 for the sentinel entry (they are circular queues).
+ LQEntries = maxLQEntries + 1;
+ SQEntries = maxSQEntries + 1;
+
+ loadQueue.resize(LQEntries);
+ storeQueue.resize(SQEntries);
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+ cachePorts = params->cachePorts;
+
+ Port *mem_dport = params->mem->getPort("");
+ dcachePort->setPeer(mem_dport);
+ mem_dport->setPeer(dcachePort);
+
+ memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+ dcachePort = new DcachePort(cpu, this);
+}
+
+template<class Impl>
+std::string
+LSQUnit<Impl>::name() const
+{
+ if (Impl::MaxThreads == 1) {
+ return iewStage->name() + ".lsq";
+ } else {
+ return iewStage->name() + ".lsq.thread." + to_string(lsqID);
+ }
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearLQ()
+{
+ loadQueue.clear();
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearSQ()
+{
+ storeQueue.clear();
+}
+
+#if 0
+template<class Impl>
+void
+LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ DPRINTF(LSQUnit, "Setting the page table pointer.\n");
+ pTable = pt_ptr;
+}
+#endif
+
+template<class Impl>
+void
+LSQUnit<Impl>::switchOut()
+{
+ switchedOut = true;
+ for (int i = 0; i < loadQueue.size(); ++i)
+ loadQueue[i] = NULL;
+
+ assert(storesToWB == 0);
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::takeOverFrom()
+{
+ switchedOut = false;
+ loads = stores = storesToWB = 0;
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+
+ memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+
+ stalled = false;
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeLQ(unsigned size)
+{
+ unsigned size_plus_sentinel = size + 1;
+ assert(size_plus_sentinel >= LQEntries);
+
+ if (size_plus_sentinel > LQEntries) {
+ while (size_plus_sentinel > loadQueue.size()) {
+ DynInstPtr dummy;
+ loadQueue.push_back(dummy);
+ LQEntries++;
+ }
+ } else {
+ LQEntries = size_plus_sentinel;
+ }
+
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeSQ(unsigned size)
+{
+ unsigned size_plus_sentinel = size + 1;
+ if (size_plus_sentinel > SQEntries) {
+ while (size_plus_sentinel > storeQueue.size()) {
+ SQEntry dummy;
+ storeQueue.push_back(dummy);
+ SQEntries++;
+ }
+ } else {
+ SQEntries = size_plus_sentinel;
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insert(DynInstPtr &inst)
+{
+ assert(inst->isMemRef());
+
+ assert(inst->isLoad() || inst->isStore());
+
+ if (inst->isLoad()) {
+ insertLoad(inst);
+ } else {
+ insertStore(inst);
+ }
+
+ inst->setInLSQ();
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ assert((loadTail + 1) % LQEntries != loadHead);
+ assert(loads < LQEntries);
+
+ DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ load_inst->readPC(), loadTail, load_inst->seqNum);
+
+ load_inst->lqIdx = loadTail;
+
+ if (stores == 0) {
+ load_inst->sqIdx = -1;
+ } else {
+ load_inst->sqIdx = storeTail;
+ }
+
+ loadQueue[loadTail] = load_inst;
+
+ incrLdIdx(loadTail);
+
+ ++loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ // Make sure it is not full before inserting an instruction.
+ assert((storeTail + 1) % SQEntries != storeHead);
+ assert(stores < SQEntries);
+
+ DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ store_inst->readPC(), storeTail, store_inst->seqNum);
+
+ store_inst->sqIdx = storeTail;
+ store_inst->lqIdx = loadTail;
+
+ storeQueue[storeTail] = SQEntry(store_inst);
+
+ incrStIdx(storeTail);
+
+ ++stores;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+LSQUnit<Impl>::getMemDepViolator()
+{
+ DynInstPtr temp = memDepViolator;
+
+ memDepViolator = NULL;
+
+ return temp;
+}
+
+template <class Impl>
+unsigned
+LSQUnit<Impl>::numFreeEntries()
+{
+ unsigned free_lq_entries = LQEntries - loads;
+ unsigned free_sq_entries = SQEntries - stores;
+
+ // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ // empty/full conditions. Subtract 1 from the free entries.
+ if (free_lq_entries < free_sq_entries) {
+ return free_lq_entries - 1;
+ } else {
+ return free_sq_entries - 1;
+ }
+}
+
+template <class Impl>
+int
+LSQUnit<Impl>::numLoadsReady()
+{
+ int load_idx = loadHead;
+ int retval = 0;
+
+ while (load_idx != loadTail) {
+ assert(loadQueue[load_idx]);
+
+ if (loadQueue[load_idx]->readyToIssue()) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
+{
+ // Execute a specific load.
+ Fault load_fault = NoFault;
+
+ DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
+ inst->readPC(),inst->seqNum);
+
+ load_fault = inst->initiateAcc();
+
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+ if (load_fault != NoFault) {
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
+ iewStage->instToCommit(inst);
+ iewStage->activityThisCycle();
+ }
+
+ return load_fault;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
+{
+ using namespace TheISA;
+ // Make sure that a store exists.
+ assert(stores != 0);
+
+ int store_idx = store_inst->sqIdx;
+
+ DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
+ store_inst->readPC(), store_inst->seqNum);
+
+ // Check the recently completed loads to see if any match this store's
+ // address. If so, then we have a memory ordering violation.
+ int load_idx = store_inst->lqIdx;
+
+ Fault store_fault = store_inst->initiateAcc();
+// Fault store_fault = store_inst->execute();
+
+ if (storeQueue[store_idx].size == 0) {
+ DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ store_inst->readPC(),store_inst->seqNum);
+
+ return store_fault;
+ }
+
+ assert(store_fault == NoFault);
+
+ if (store_inst->isStoreConditional()) {
+ // Store conditionals need to set themselves as able to
+ // writeback if we haven't had a fault by here.
+ storeQueue[store_idx].canWB = true;
+
+ ++storesToWB;
+ }
+
+ if (!memDepViolator) {
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+ // It's safe to check all loads because effAddr is set to
+ // InvalAddr when the dyn inst is created.
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if ((loadQueue[load_idx]->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ memDepViolator = loadQueue[load_idx];
+
+ return genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
+
+ // If we've reached this point, there was no violation.
+ memDepViolator = NULL;
+ }
+
+ return store_fault;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoad()
+{
+ assert(loadQueue[loadHead]);
+
+ DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
+ loadQueue[loadHead]->readPC());
+
+
+ loadQueue[loadHead] = NULL;
+
+ incrLdIdx(loadHead);
+
+ --loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+ assert(loads == 0 || loadQueue[loadHead]);
+
+ while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+ commitLoad();
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+ assert(stores == 0 || storeQueue[storeHead].inst);
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail) {
+ assert(storeQueue[store_idx].inst);
+ // Mark any stores that are now committed and have not yet
+ // been marked as able to write back.
+ if (!storeQueue[store_idx].canWB) {
+ if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+ break;
+ }
+ DPRINTF(LSQUnit, "Marking store as able to write back, PC "
+ "%#x [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].canWB = true;
+
+ ++storesToWB;
+ }
+
+ incrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::writebackStores()
+{
+ while (storesToWB > 0 &&
+ storeWBIdx != storeTail &&
+ storeQueue[storeWBIdx].inst &&
+ storeQueue[storeWBIdx].canWB &&
+ usedPorts < cachePorts) {
+
+ // Store didn't write any data so no need to write it back to
+ // memory.
+ if (storeQueue[storeWBIdx].size == 0) {
+ completeStore(storeWBIdx);
+
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+/*
+ if (dcacheInterface && dcacheInterface->isBlocked()) {
+ DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+ " is blocked!\n");
+ break;
+ }
+*/
+ ++usedPorts;
+
+ if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+
+ assert(storeQueue[storeWBIdx].req);
+ assert(!storeQueue[storeWBIdx].committed);
+
+ DynInstPtr inst = storeQueue[storeWBIdx].inst;
+
+ Request *req = storeQueue[storeWBIdx].req;
+ storeQueue[storeWBIdx].committed = true;
+
+ assert(!inst->memData);
+ inst->memData = new uint8_t[64];
+ memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize());
+
+ PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ data_pkt->dataStatic(inst->memData);
+
+ DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+ req->getPaddr(), *(inst->memData),
+ storeQueue[storeWBIdx].inst->seqNum);
+
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // Need to handle becoming blocked on a store.
+ } else {
+ /*
+ StoreCompletionEvent *store_event = new
+ StoreCompletionEvent(storeWBIdx, NULL, this);
+ */
+ if (isStalled() &&
+ storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+/*
+ typename LdWritebackEvent *wb = NULL;
+ if (req->flags & LOCKED) {
+ // Stx_C should not generate a system port transaction
+ // if it misses in the cache, but that might be hard
+ // to accomplish without explicit cache support.
+ wb = new typename
+ LdWritebackEvent(storeQueue[storeWBIdx].inst,
+ iewStage);
+ store_event->wbEvent = wb;
+ }
+*/
+ if (data_pkt->result != Packet::Success) {
+ DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+ storeWBIdx);
+
+ DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
+
+ //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+ //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+ // @todo: Increment stat here.
+ } else {
+ DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
+ storeWBIdx);
+
+ DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
+ }
+
+ incrStIdx(storeWBIdx);
+ }
+ }
+
+ // Not sure this should set it to 0.
+ usedPorts = 0;
+
+ assert(stores >= 0 && storesToWB >= 0);
+}
+
+/*template <class Impl>
+void
+LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
+{
+ list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+ mshrSeqNums.end(),
+ seqNum);
+
+ if (mshr_it != mshrSeqNums.end()) {
+ mshrSeqNums.erase(mshr_it);
+ DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+ }
+}*/
+
+template <class Impl>
+void
+LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
+{
+ DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
+ "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
+
+ int load_idx = loadTail;
+ decrLdIdx(load_idx);
+
+ while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+ DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
+ "[sn:%lli]\n",
+ loadQueue[load_idx]->readPC(),
+ loadQueue[load_idx]->seqNum);
+
+ if (isStalled() && load_idx == stallingLoadIdx) {
+ stalled = false;
+ stallingStoreIsn = 0;
+ stallingLoadIdx = 0;
+ }
+
+ // Clear the smart pointer to make sure it is decremented.
+ loadQueue[load_idx]->squashed = true;
+ loadQueue[load_idx] = NULL;
+ --loads;
+
+ // Inefficient!
+ loadTail = load_idx;
+
+ decrLdIdx(load_idx);
+ }
+
+ if (isLoadBlocked) {
+ if (squashed_num < blockedLoadSeqNum) {
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = 0;
+ }
+ }
+
+ int store_idx = storeTail;
+ decrStIdx(store_idx);
+
+ while (stores != 0 &&
+ storeQueue[store_idx].inst->seqNum > squashed_num) {
+ // Instructions marked as can WB are already committed.
+ if (storeQueue[store_idx].canWB) {
+ break;
+ }
+
+ DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
+ "idx:%i [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ store_idx, storeQueue[store_idx].inst->seqNum);
+
+ // I don't think this can happen. It should have been cleared
+ // by the stalling load.
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ panic("Is stalled should have been cleared by stalling load!\n");
+ stalled = false;
+ stallingStoreIsn = 0;
+ }
+
+ // Clear the smart pointer to make sure it is decremented.
+ storeQueue[store_idx].inst->squashed = true;
+ storeQueue[store_idx].inst = NULL;
+ storeQueue[store_idx].canWB = 0;
+
+ storeQueue[store_idx].req = NULL;
+ --stores;
+
+ // Inefficient!
+ storeTail = store_idx;
+
+ decrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::completeStore(int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+ storeQueue[store_idx].completed = true;
+ --storesToWB;
+ // A bit conservative because a store completion may not free up entries,
+ // but hopefully avoids two store completions in one cycle from making
+ // the CPU tick twice.
+ cpu->activityThisCycle();
+
+ if (store_idx == storeHead) {
+ do {
+ incrStIdx(storeHead);
+
+ --stores;
+ } while (storeQueue[storeHead].completed &&
+ storeHead != storeTail);
+
+ iewStage->updateLSQNextCycle = true;
+ }
+
+ DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+ "idx:%i\n",
+ storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
+
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+
+ storeQueue[store_idx].inst->setCompleted();
+
+ // Tell the checker we've completed this instruction. Some stores
+ // may get reported twice to the checker, but the checker can
+ // handle that case.
+ if (cpu->checker) {
+ cpu->checker->tick(storeQueue[store_idx].inst);
+ }
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrStIdx(int &store_idx)
+{
+ if (++store_idx >= SQEntries)
+ store_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrStIdx(int &store_idx)
+{
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrLdIdx(int &load_idx)
+{
+ if (++load_idx >= LQEntries)
+ load_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrLdIdx(int &load_idx)
+{
+ if (--load_idx < 0)
+ load_idx += LQEntries;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::dumpInsts()
+{
+ cprintf("Load store queue: Dumping instructions.\n");
+ cprintf("Load queue size: %i\n", loads);
+ cprintf("Load queue: ");
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail && loadQueue[load_idx]) {
+ cprintf("%#x ", loadQueue[load_idx]->readPC());
+
+ incrLdIdx(load_idx);
+ }
+
+ cprintf("Store queue size: %i\n", stores);
+ cprintf("Store queue: ");
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail && storeQueue[store_idx].inst) {
+ cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+
+ incrStIdx(store_idx);
+ }
+
+ cprintf("\n");
+}
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 3350903db..45fe490d2 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -31,6 +31,7 @@
#include "arch/isa_traits.hh"
#include "arch/faults.hh"
+#include "arch/types.hh"
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/o3/comm.hh"
@@ -44,9 +45,8 @@
/**
* Simple physical register file class.
- * This really only depends on the ISA, and not the Impl. Things that are
- * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably
- * should go in the AlphaFullCPU.
+ * Right now this is specific to Alpha until we decide if/how to make things
+ * generic enough to support other ISAs.
*/
template <class Impl>
class PhysRegFile
@@ -54,8 +54,15 @@ class PhysRegFile
protected:
typedef TheISA::IntReg IntReg;
typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscRegFile MiscRegFile;
typedef TheISA::MiscReg MiscReg;
+
+ typedef union {
+ FloatReg d;
+ FloatRegBits q;
+ } PhysFloatReg;
+
// Note that most of the definitions of the IntReg, FloatReg, etc. exist
// within the Impl/ISA class and not within this PhysRegFile class.
@@ -97,7 +104,7 @@ class PhysRegFile
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatReg floatReg = floatRegFile.readReg(reg_idx, width);
+ FloatReg floatReg = floatRegFile[reg_idx].d;
DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has "
"data %8.8d\n", int(reg_idx), (double)floatReg);
@@ -113,7 +120,7 @@ class PhysRegFile
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatReg floatReg = floatRegFile.readReg(reg_idx);
+ FloatReg floatReg = floatRegFile[reg_idx].d;
DPRINTF(IEW, "RegFile: Access to float register %i, has "
"data %8.8d\n", int(reg_idx), (double)floatReg);
@@ -129,7 +136,7 @@ class PhysRegFile
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width);
+ FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, "
"has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
@@ -144,7 +151,7 @@ class PhysRegFile
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx);
+ FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
DPRINTF(IEW, "RegFile: Access to float register %i as int, "
"has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
@@ -176,7 +183,7 @@ class PhysRegFile
int(reg_idx), (double)val);
if (reg_idx != TheISA::ZeroReg)
- floatRegFile.setReg(reg_idx, val, width);
+ floatRegFile[reg_idx].d = width;
}
/** Sets a double precision floating point register to the given value. */
@@ -191,7 +198,7 @@ class PhysRegFile
int(reg_idx), (double)val);
if (reg_idx != TheISA::ZeroReg)
- floatRegFile.setReg(reg_idx, val);
+ floatRegFile[reg_idx].d = val;
}
/** Sets a floating point register to the given integer value. */
@@ -205,7 +212,7 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), (uint64_t)val);
- floatRegFile.setRegBits(reg_idx, val, width);
+ floatRegFile[reg_idx].q = val;
}
void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
@@ -217,6 +224,13 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), (uint64_t)val);
+
+ floatRegFile[reg_idx].q = val;
+ }
+
+ MiscReg readMiscReg(int misc_reg, unsigned thread_id)
+ {
+ return miscRegs[thread_id].readReg(misc_reg);
}
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault,
@@ -249,7 +263,7 @@ class PhysRegFile
std::vector<IntReg> intRegFile;
/** Floating point register file. */
- std::vector<FloatReg> floatRegFile;
+ std::vector<PhysFloatReg> floatRegFile;
/** Miscellaneous register file. */
MiscRegFile miscRegs[Impl::MaxThreads];
diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh
index d01fd93ce..640445407 100644
--- a/src/cpu/o3/sat_counter.hh
+++ b/src/cpu/o3/sat_counter.hh
@@ -29,6 +29,7 @@
#ifndef __CPU_O3_SAT_COUNTER_HH__
#define __CPU_O3_SAT_COUNTER_HH__
+#include "base/misc.hh"
#include "sim/host.hh"
/**
diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc
new file mode 100644
index 000000000..b0e433620
--- /dev/null
+++ b/src/cpu/o3/scoreboard.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/scoreboard.hh"
+
+Scoreboard::Scoreboard(unsigned activeThreads,
+ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs,
+ unsigned _numMiscRegs,
+ unsigned _zeroRegIdx)
+ : numLogicalIntRegs(_numLogicalIntRegs),
+ numPhysicalIntRegs(_numPhysicalIntRegs),
+ numLogicalFloatRegs(_numLogicalFloatRegs),
+ numPhysicalFloatRegs(_numPhysicalFloatRegs),
+ numMiscRegs(_numMiscRegs),
+ zeroRegIdx(_zeroRegIdx)
+{
+ //Get Register Sizes
+ numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
+ numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
+
+ //Resize scoreboard appropriately
+ regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads));
+
+ //Initialize values
+ for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
+ regScoreBoard[i] = 1;
+ }
+
+ for (int i= numPhysicalIntRegs;
+ i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
+ i++) {
+ regScoreBoard[i] = 1;
+ }
+
+ for (int i = numPhysicalRegs;
+ i < numPhysicalRegs + (numMiscRegs * activeThreads);
+ i++) {
+ regScoreBoard[i] = 1;
+ }
+}
+
+std::string
+Scoreboard::name() const
+{
+ return "cpu.scoreboard";
+}
+
+bool
+Scoreboard::getReg(PhysRegIndex phys_reg)
+{
+ // Always ready if int or fp zero reg.
+ if (phys_reg == zeroRegIdx ||
+ phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+ return 1;
+ }
+
+ return regScoreBoard[phys_reg];
+}
+
+void
+Scoreboard::setReg(PhysRegIndex phys_reg)
+{
+ DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
+
+ regScoreBoard[phys_reg] = 1;
+}
+
+void
+Scoreboard::unsetReg(PhysRegIndex ready_reg)
+{
+ if (ready_reg == zeroRegIdx ||
+ ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+ // Don't do anything if int or fp zero reg.
+ return;
+ }
+
+ regScoreBoard[ready_reg] = 0;
+}
diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh
new file mode 100644
index 000000000..77f2cf157
--- /dev/null
+++ b/src/cpu/o3/scoreboard.hh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_SCOREBOARD_HH__
+#define __CPU_O3_SCOREBOARD_HH__
+
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/o3/comm.hh"
+
+/**
+ * Implements a simple scoreboard to track which registers are ready.
+ * This class assumes that the fp registers start, index wise, right after
+ * the integer registers. The misc. registers start, index wise, right after
+ * the fp registers.
+ * @todo: Fix up handling of the zero register in case the decoder does not
+ * automatically make insts that write the zero register into nops.
+ */
+class Scoreboard
+{
+ public:
+ /** Constructs a scoreboard.
+ * @param activeThreads The number of active threads.
+ * @param _numLogicalIntRegs Number of logical integer registers.
+ * @param _numPhysicalIntRegs Number of physical integer registers.
+ * @param _numLogicalFloatRegs Number of logical fp registers.
+ * @param _numPhysicalFloatRegs Number of physical fp registers.
+ * @param _numMiscRegs Number of miscellaneous registers.
+ * @param _zeroRegIdx Index of the zero register.
+ */
+ Scoreboard(unsigned activeThreads,
+ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs,
+ unsigned _numMiscRegs,
+ unsigned _zeroRegIdx);
+
+ /** Destructor. */
+ ~Scoreboard() {}
+
+ /** Returns the name of the scoreboard. */
+ std::string name() const;
+
+ /** Checks if the register is ready. */
+ bool getReg(PhysRegIndex ready_reg);
+
+ /** Sets the register as ready. */
+ void setReg(PhysRegIndex phys_reg);
+
+ /** Sets the register as not ready. */
+ void unsetReg(PhysRegIndex ready_reg);
+
+ private:
+ /** Scoreboard of physical integer registers, saying whether or not they
+ * are ready.
+ */
+ std::vector<bool> regScoreBoard;
+
+ /** Number of logical integer registers. */
+ int numLogicalIntRegs;
+
+ /** Number of physical integer registers. */
+ int numPhysicalIntRegs;
+
+ /** Number of logical floating point registers. */
+ int numLogicalFloatRegs;
+
+ /** Number of physical floating point registers. */
+ int numPhysicalFloatRegs;
+
+ /** Number of miscellaneous registers. */
+ int numMiscRegs;
+
+ /** Number of logical integer + float registers. */
+ int numLogicalRegs;
+
+ /** Number of physical integer + float registers. */
+ int numPhysicalRegs;
+
+ /** The logical index of the zero register. */
+ int zeroRegIdx;
+};
+
+#endif
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
new file mode 100644
index 000000000..9101eafb9
--- /dev/null
+++ b/src/cpu/o3/thread_state.hh
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_THREAD_STATE_HH__
+#define __CPU_O3_THREAD_STATE_HH__
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/thread_state.hh"
+
+class Event;
+class Process;
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+#else
+class FunctionalMemory;
+class Process;
+#endif
+
+/**
+ * Class that has various thread state, such as the status, the
+ * current instruction being processed, whether or not the thread has
+ * a trap pending or is being externally updated, the ExecContext
+ * proxy pointer, etc. It also handles anything related to a specific
+ * thread's process, such as syscalls and checking valid addresses.
+ */
+template <class Impl>
+struct O3ThreadState : public ThreadState {
+ typedef ExecContext::Status Status;
+ typedef typename Impl::FullCPU FullCPU;
+
+ Status _status;
+
+ // Current instruction
+ TheISA::MachInst inst;
+ private:
+ FullCPU *cpu;
+ public:
+
+ bool inSyscall;
+
+ bool trapPending;
+
+#if FULL_SYSTEM
+ O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
+ : ThreadState(-1, _thread_num, _mem),
+ inSyscall(0), trapPending(0)
+ { }
+#else
+ O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
+ : ThreadState(-1, _thread_num, NULL, _process, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ { }
+
+ O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
+ int _asid)
+ : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ { }
+#endif
+
+ ExecContext *xcProxy;
+
+ ExecContext *getXCProxy() { return xcProxy; }
+
+ Status status() const { return _status; }
+
+ void setStatus(Status new_status) { _status = new_status; }
+
+ bool misspeculating() { return false; }
+
+ void setInst(TheISA::MachInst _inst) { inst = _inst; }
+
+ Counter readFuncExeInst() { return funcExeInst; }
+
+ void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+
+#if !FULL_SYSTEM
+ void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
+#endif
+};
+
+#endif // __CPU_O3_THREAD_STATE_HH__