summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/SConscript8
-rw-r--r--src/cpu/base_dyn_inst.hh30
-rw-r--r--src/cpu/base_dyn_inst_impl.hh12
-rw-r--r--src/cpu/checker/cpu.hh2
-rw-r--r--src/cpu/checker/thread_context.hh5
-rw-r--r--src/cpu/exetrace.cc44
-rw-r--r--src/cpu/exetrace.hh1
-rw-r--r--src/cpu/o3/2bit_local_pred.hh3
-rwxr-xr-xsrc/cpu/o3/SConscript26
-rw-r--r--src/cpu/o3/alpha/cpu.hh3
-rw-r--r--src/cpu/o3/alpha/cpu_impl.hh16
-rw-r--r--src/cpu/o3/alpha/thread_context.hh6
-rw-r--r--src/cpu/o3/bpred_unit.hh4
-rw-r--r--src/cpu/o3/bpred_unit_impl.hh25
-rw-r--r--src/cpu/o3/btb.cc2
-rw-r--r--src/cpu/o3/btb.hh3
-rw-r--r--src/cpu/o3/comm.hh8
-rw-r--r--src/cpu/o3/commit.hh15
-rw-r--r--src/cpu/o3/commit_impl.hh172
-rw-r--r--src/cpu/o3/cpu.cc84
-rw-r--r--src/cpu/o3/cpu.hh10
-rw-r--r--src/cpu/o3/decode.hh13
-rw-r--r--src/cpu/o3/decode_impl.hh88
-rw-r--r--src/cpu/o3/dyn_inst.hh19
-rw-r--r--src/cpu/o3/fetch.hh28
-rw-r--r--src/cpu/o3/fetch_impl.hh223
-rw-r--r--src/cpu/o3/iew.hh12
-rw-r--r--src/cpu/o3/iew_impl.hh91
-rw-r--r--src/cpu/o3/inst_queue.hh1
-rw-r--r--src/cpu/o3/inst_queue_impl.hh16
-rwxr-xr-xsrc/cpu/o3/isa_specific.hh7
-rw-r--r--src/cpu/o3/lsq.hh59
-rw-r--r--src/cpu/o3/lsq_impl.hh92
-rw-r--r--src/cpu/o3/lsq_unit.hh89
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh52
-rwxr-xr-xsrc/cpu/o3/mips/cpu.cc39
-rwxr-xr-xsrc/cpu/o3/mips/cpu.hh149
-rw-r--r--src/cpu/o3/mips/cpu_builder.cc394
-rw-r--r--src/cpu/o3/mips/cpu_impl.hh250
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst.cc37
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst.hh282
-rwxr-xr-xsrc/cpu/o3/mips/dyn_inst_impl.hh116
-rw-r--r--src/cpu/o3/mips/impl.hh93
-rw-r--r--src/cpu/o3/mips/params.hh63
-rwxr-xr-xsrc/cpu/o3/mips/thread_context.cc36
-rw-r--r--src/cpu/o3/mips/thread_context.hh68
-rw-r--r--src/cpu/o3/ras.hh3
-rw-r--r--src/cpu/o3/regfile.hh2
-rw-r--r--src/cpu/o3/rename.hh5
-rw-r--r--src/cpu/o3/rename_impl.hh87
-rw-r--r--src/cpu/o3/rename_map.hh3
-rw-r--r--src/cpu/o3/rob_impl.hh18
-rw-r--r--src/cpu/o3/scoreboard.hh1
-rw-r--r--src/cpu/o3/store_set.hh2
-rwxr-xr-xsrc/cpu/o3/thread_context.hh2
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh12
-rw-r--r--src/cpu/o3/thread_state.hh2
-rw-r--r--src/cpu/o3/tournament_pred.hh3
-rw-r--r--src/cpu/ozone/cpu_impl.hh1
-rw-r--r--src/cpu/ozone/dyn_inst.hh1
-rw-r--r--src/cpu/ozone/dyn_inst_impl.hh3
-rw-r--r--src/cpu/ozone/ea_list.hh2
-rw-r--r--src/cpu/ozone/front_end_impl.hh1
-rw-r--r--src/cpu/ozone/inorder_back_end_impl.hh4
-rw-r--r--src/cpu/ozone/lsq_unit.hh2
-rw-r--r--src/cpu/ozone/lsq_unit_impl.hh2
-rw-r--r--src/cpu/ozone/lw_lsq.hh2
-rw-r--r--src/cpu/ozone/lw_lsq_impl.hh2
-rw-r--r--src/cpu/ozone/null_predictor.hh2
-rw-r--r--src/cpu/ozone/ozone_impl.hh1
-rw-r--r--src/cpu/ozone/simple_impl.hh1
-rw-r--r--src/cpu/ozone/thread_state.hh3
-rw-r--r--src/cpu/simple/atomic.cc28
-rw-r--r--src/cpu/simple/atomic.hh1
-rw-r--r--src/cpu/simple/base.cc13
-rw-r--r--src/cpu/simple/timing.cc79
-rw-r--r--src/cpu/simple/timing.hh51
-rw-r--r--src/cpu/simple_thread.cc1
-rw-r--r--src/cpu/simple_thread.hh4
-rw-r--r--src/cpu/static_inst.hh5
-rw-r--r--src/cpu/thread_context.hh11
-rw-r--r--src/cpu/thread_state.hh2
82 files changed, 2674 insertions, 484 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index bc4ec7923..2bb9a2399 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -71,7 +71,8 @@ virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) c
# Generate a temporary CPU list, including the CheckerCPU if
# it's enabled. This isn't used for anything else other than StaticInst
# headers.
-temp_cpu_list = env['CPU_MODELS']
+temp_cpu_list = env['CPU_MODELS'][:]
+
if env['USE_CHECKER']:
temp_cpu_list.append('CheckerCPU')
@@ -113,6 +114,9 @@ CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU']
#
#################################################################
+# Keep a list of CPU models that support SMT
+env['SMT_CPU_MODELS'] = []
+
sources = []
need_simple_base = False
@@ -156,6 +160,8 @@ if 'O3CPU' in env['CPU_MODELS']:
''')
if env['USE_CHECKER']:
sources += Split('o3/checker_builder.cc')
+ else:
+ env['SMT_CPU_MODELS'].append('O3CPU') # Checker doesn't support SMT right now
if 'OzoneCPU' in env['CPU_MODELS']:
need_bp_unit = True
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9cc61f74c..40611abe6 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -215,6 +215,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
*/
Addr nextPC;
+ /** Next non-speculative NPC. Target PC for Mips or Sparc. */
+ Addr nextNPC;
+
/** Predicted next PC. */
Addr predPC;
@@ -275,6 +278,11 @@ class BaseDynInst : public FastAlloc, public RefCounted
*/
Addr readNextPC() { return nextPC; }
+ /** Returns the next NPC. This could be the speculative next NPC if it is
+ * called prior to the actual branch target being calculated.
+ */
+ Addr readNextNPC() { return nextNPC; }
+
/** Set the predicted target of this current instruction. */
void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; }
@@ -282,11 +290,20 @@ class BaseDynInst : public FastAlloc, public RefCounted
Addr readPredTarg() { return predPC; }
/** Returns whether the instruction was predicted taken or not. */
- bool predTaken() { return predPC != (PC + sizeof(MachInst)); }
+ bool predTaken()
+#if THE_ISA == ALPHA_ISA
+ { return predPC != (PC + sizeof(MachInst)); }
+#else
+ { return predPC != (nextPC + sizeof(MachInst)); }
+#endif
/** Returns whether the instruction mispredicted. */
- bool mispredicted() { return predPC != nextPC; }
-
+ bool mispredicted()
+#if THE_ISA == ALPHA_ISA
+ { return predPC != nextPC; }
+#else
+ { return predPC != nextNPC; }
+#endif
//
// Instruction types. Forward checks to StaticInst object.
//
@@ -308,6 +325,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); }
bool isCondCtrl() const { return staticInst->isCondCtrl(); }
bool isUncondCtrl() const { return staticInst->isUncondCtrl(); }
+ bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); }
bool isThreadSync() const { return staticInst->isThreadSync(); }
bool isSerializing() const { return staticInst->isSerializing(); }
bool isSerializeBefore() const
@@ -545,6 +563,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
nextPC = val;
}
+ /** Set the next NPC of this instruction (the target in Mips or Sparc).*/
+ void setNextNPC(uint64_t val)
+ {
+ nextNPC = val;
+ }
+
/** Sets the ASID. */
void setASID(short addr_space_id) { asid = addr_space_id; }
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index 91424faad..f2109e88d 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -36,15 +36,12 @@
#include "base/cprintf.hh"
#include "base/trace.hh"
-#include "arch/faults.hh"
+#include "sim/faults.hh"
#include "cpu/exetrace.hh"
#include "mem/request.hh"
#include "cpu/base_dyn_inst.hh"
-using namespace std;
-using namespace TheISA;
-
#define NOHASH
#ifndef NOHASH
@@ -65,7 +62,7 @@ my_hash_t thishash;
#endif
template <class Impl>
-BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC,
+BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst, Addr inst_PC,
Addr pred_PC, InstSeqNum seq_num,
ImplCPU *cpu)
: staticInst(machInst), traceData(NULL), cpu(cpu)
@@ -73,7 +70,8 @@ BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC,
seqNum = seq_num;
PC = inst_PC;
- nextPC = PC + sizeof(MachInst);
+ nextPC = PC + sizeof(TheISA::MachInst);
+ nextNPC = nextPC + sizeof(TheISA::MachInst);
predPC = pred_PC;
initVars();
@@ -249,7 +247,7 @@ void
BaseDynInst<Impl>::dump()
{
cprintf("T%d : %#08d `", threadNumber, PC);
- cout << staticInst->disassemble(PC);
+ std::cout << staticInst->disassemble(PC);
cprintf("'\n");
}
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index a508c56ba..6d6ae1e0a 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -170,7 +170,7 @@ class CheckerCPU : public BaseCPU
virtual Counter totalInstructions() const
{
- return numInst - startNumInst;
+ return 0;
}
// number of simulated loads
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index c035e92ac..8c0186dae 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -31,6 +31,7 @@
#ifndef __CPU_CHECKER_THREAD_CONTEXT_HH__
#define __CPU_CHECKER_THREAD_CONTEXT_HH__
+#include "arch/types.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh"
@@ -295,8 +296,8 @@ class CheckerThreadContext : public ThreadContext
Counter readFuncExeInst() { return actualTC->readFuncExeInst(); }
#endif
- void changeRegFileContext(RegFile::ContextParam param,
- RegFile::ContextVal val)
+ void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val)
{
actualTC->changeRegFileContext(param, val);
checkerTC->changeRegFileContext(param, val);
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 7fdad5113..748f66d37 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -34,6 +34,7 @@
#include <fstream>
#include <iomanip>
+#include "arch/regfile.hh"
#include "base/loader/symtab.hh"
#include "cpu/base.hh"
#include "cpu/exetrace.hh"
@@ -42,7 +43,7 @@
#include "sim/system.hh"
using namespace std;
-
+using namespace TheISA;
////////////////////////////////////////////////////////////////////////
//
@@ -53,7 +54,43 @@ using namespace std;
void
Trace::InstRecord::dump(ostream &outs)
{
- if (flags[INTEL_FORMAT]) {
+ if (flags[PRINT_REG_DELTA])
+ {
+ outs << "PC = 0x" << setbase(16)
+ << setfill('0')
+ << setw(16) << PC << endl;
+ outs << setbase(10)
+ << setfill(' ')
+ << setw(0);
+ /*
+ int numSources = staticInst->numSrcRegs();
+ int numDests = staticInst->numDestRegs();
+ outs << "Sources:";
+ for(int x = 0; x < numSources; x++)
+ {
+ int sourceNum = staticInst->srcRegIdx(x);
+ if(sourceNum < FP_Base_DepTag)
+ outs << " " << getIntRegName(sourceNum);
+ else if(sourceNum < Ctrl_Base_DepTag)
+ outs << " " << getFloatRegName(sourceNum - FP_Base_DepTag);
+ else
+ outs << " " << getMiscRegName(sourceNum - Ctrl_Base_DepTag);
+ }
+ outs << endl;
+ outs << "Destinations:";
+ for(int x = 0; x < numDests; x++)
+ {
+ int destNum = staticInst->destRegIdx(x);
+ if(destNum < FP_Base_DepTag)
+ outs << " " << getIntRegName(destNum);
+ else if(destNum < Ctrl_Base_DepTag)
+ outs << " " << getFloatRegName(destNum - FP_Base_DepTag);
+ else
+ outs << " " << getMiscRegName(destNum - Ctrl_Base_DepTag);
+ }
+ outs << endl;*/
+ }
+ else if (flags[INTEL_FORMAT]) {
#if FULL_SYSTEM
bool is_trace_system = (cpu->system->name() == trace_system);
#else
@@ -196,6 +233,8 @@ Param<bool> exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq",
"print fetch sequence number", false);
Param<bool> exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq",
"print correct-path sequence number", false);
+Param<bool> exe_trace_print_reg_delta(&exeTraceParams, "print_reg_delta",
+ "print which registers changed to what", false);
Param<bool> exe_trace_pc_symbol(&exeTraceParams, "pc_symbol",
"Use symbols for the PC if available", true);
Param<bool> exe_trace_intel_format(&exeTraceParams, "intel_format",
@@ -222,6 +261,7 @@ Trace::InstRecord::setParams()
flags[PRINT_INT_REGS] = exe_trace_print_iregs;
flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq;
flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq;
+ flags[PRINT_REG_DELTA] = exe_trace_print_reg_delta;
flags[PC_SYMBOL] = exe_trace_pc_symbol;
flags[INTEL_FORMAT] = exe_trace_intel_format;
trace_system = exe_trace_system;
diff --git a/src/cpu/exetrace.hh b/src/cpu/exetrace.hh
index 95f8b449c..8cc98b777 100644
--- a/src/cpu/exetrace.hh
+++ b/src/cpu/exetrace.hh
@@ -147,6 +147,7 @@ class InstRecord : public Record
PRINT_INT_REGS,
PRINT_FETCH_SEQ,
PRINT_CP_SEQ,
+ PRINT_REG_DELTA,
PC_SYMBOL,
INTEL_FORMAT,
NUM_BITS
diff --git a/src/cpu/o3/2bit_local_pred.hh b/src/cpu/o3/2bit_local_pred.hh
index 0a2a71d3e..954b86b4c 100644
--- a/src/cpu/o3/2bit_local_pred.hh
+++ b/src/cpu/o3/2bit_local_pred.hh
@@ -31,9 +31,8 @@
#ifndef __CPU_O3_2BIT_LOCAL_PRED_HH__
#define __CPU_O3_2BIT_LOCAL_PRED_HH__
-// For Addr type.
-#include "arch/isa_traits.hh"
#include "cpu/o3/sat_counter.hh"
+#include "sim/host.hh"
#include <vector>
diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript
index e65d41411..afbd4c533 100755
--- a/src/cpu/o3/SConscript
+++ b/src/cpu/o3/SConscript
@@ -52,21 +52,19 @@ if env['TARGET_ISA'] == 'alpha':
alpha/cpu_builder.cc
''')
elif env['TARGET_ISA'] == 'mips':
- sys.exit('O3 CPU does not support MIPS')
- #sources += Split('''
- # mips/dyn_inst.cc
- # mips/cpu.cc
- # mips/thread_context.cc
- # mips/cpu_builder.cc
- # ''')
+ sources += Split('''
+ mips/dyn_inst.cc
+ mips/cpu.cc
+ mips/thread_context.cc
+ mips/cpu_builder.cc
+ ''')
elif env['TARGET_ISA'] == 'sparc':
- sys.exit('O3 CPU does not support MIPS')
- #sources += Split('''
- # sparc/dyn_inst.cc
- # sparc/cpu.cc
- # sparc/thread_context.cc
- # sparc/cpu_builder.cc
- # ''')
+ sources += Split('''
+ sparc/dyn_inst.cc
+ sparc/cpu.cc
+ sparc/thread_context.cc
+ sparc/cpu_builder.cc
+ ''')
else:
sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh
index b961341d5..9d97f9701 100644
--- a/src/cpu/o3/alpha/cpu.hh
+++ b/src/cpu/o3/alpha/cpu.hh
@@ -31,7 +31,8 @@
#ifndef __CPU_O3_ALPHA_CPU_HH__
#define __CPU_O3_ALPHA_CPU_HH__
-#include "arch/isa_traits.hh"
+#include "arch/regfile.hh"
+#include "arch/types.hh"
#include "cpu/thread_context.hh"
#include "cpu/o3/cpu.hh"
#include "sim/byteswap.hh"
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index 0473e60c2..b7362fad9 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -31,6 +31,7 @@
#include "config/use_checker.hh"
#include "arch/alpha/faults.hh"
+#include "arch/alpha/isa_traits.hh"
#include "base/cprintf.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
@@ -53,8 +54,6 @@
#include "sim/system.hh"
#endif
-using namespace TheISA;
-
template <class Impl>
AlphaO3CPU<Impl>::AlphaO3CPU(Params *params)
#if FULL_SYSTEM
@@ -191,14 +190,14 @@ AlphaO3CPU<Impl>::regStats()
template <class Impl>
-MiscReg
+TheISA::MiscReg
AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid)
{
return this->regFile.readMiscReg(misc_reg, tid);
}
template <class Impl>
-MiscReg
+TheISA::MiscReg
AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault,
unsigned tid)
{
@@ -300,6 +299,7 @@ template <class Impl>
void
AlphaO3CPU<Impl>::processInterrupts()
{
+ using namespace TheISA;
// Check for interrupts here. For now can copy the code that
// exists within isa_fullsys_traits.hh. Also assume that thread 0
// is the one that handles the interrupts.
@@ -411,12 +411,12 @@ AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
// return value itself in the standard return value reg (v0).
if (return_value.successful()) {
// no error
- this->setArchIntReg(SyscallSuccessReg, 0, tid);
- this->setArchIntReg(ReturnValueReg, return_value.value(), tid);
+ this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
+ this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
} else {
// got an error, return details
- this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid);
- this->setArchIntReg(ReturnValueReg, -return_value.value(), tid);
+ this->setArchIntReg(TheISA::SyscallSuccessReg, (IntReg) -1, tid);
+ this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
}
}
#endif
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
index ad52b0d2e..70a09940f 100644
--- a/src/cpu/o3/alpha/thread_context.hh
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -26,9 +26,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
- * Korey Sewell
*/
+#include "arch/alpha/types.hh"
#include "cpu/o3/thread_context.hh"
template <class Impl>
@@ -65,8 +65,8 @@ class AlphaTC : public O3ThreadContext<Impl>
panic("Alpha has no NextNPC!");
}
- virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
- TheISA::RegFile::ContextVal val)
+ virtual void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val)
{ panic("Not supported on Alpha!"); }
diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh
index 2c0a39565..3c4c8e478 100644
--- a/src/cpu/o3/bpred_unit.hh
+++ b/src/cpu/o3/bpred_unit.hh
@@ -31,8 +31,6 @@
#ifndef __CPU_O3_BPRED_UNIT_HH__
#define __CPU_O3_BPRED_UNIT_HH__
-// For Addr type.
-#include "arch/isa_traits.hh"
#include "base/statistics.hh"
#include "cpu/inst_seq.hh"
@@ -41,6 +39,8 @@
#include "cpu/o3/ras.hh"
#include "cpu/o3/tournament_pred.hh"
+#include "sim/host.hh"
+
#include <list>
/**
diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh
index 0da02145b..e4e656632 100644
--- a/src/cpu/o3/bpred_unit_impl.hh
+++ b/src/cpu/o3/bpred_unit_impl.hh
@@ -28,15 +28,11 @@
* Authors: Kevin Lim
*/
-#include <list>
-#include <vector>
-
+#include "arch/types.hh"
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/bpred_unit.hh"
-using namespace std;
-
template<class Impl>
BPredUnit<Impl>::BPredUnit(Params *params)
: BTB(params->BTBEntries,
@@ -159,7 +155,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
void *bp_history = NULL;
if (inst->isUncondCtrl()) {
- DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
+ DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid);
pred_taken = true;
// Tell the BP there was an unconditional branch.
BPUncond(bp_history);
@@ -201,15 +197,20 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
++BTBLookups;
if (inst->isCall()) {
- RAS[tid].push(PC + sizeof(MachInst));
+#if THE_ISA == ALPHA_ISA
+ Addr ras_pc = PC + sizeof(MachInst); // Next PC
+#else
+ Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC
+#endif
+ RAS[tid].push(ras_pc);
// Record that it was a call so that the top RAS entry can
// be popped off if the speculation is incorrect.
predict_record.wasCall = true;
- DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call"
+ DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x was a call"
", adding %#x to the RAS.\n",
- tid, inst->readPC(), PC + sizeof(MachInst));
+ tid, inst->readPC(), ras_pc);
}
if (BTB.valid(PC, tid)) {
@@ -242,7 +243,7 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
predHist[tid].push_front(predict_record);
- DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size());
+ DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size());
return pred_taken;
}
@@ -251,8 +252,8 @@ template <class Impl>
void
BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
{
- DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
- "number %lli.\n", tid, done_sn);
+ DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until "
+ "[sn:%lli].\n", tid, done_sn);
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
diff --git a/src/cpu/o3/btb.cc b/src/cpu/o3/btb.cc
index 01640f4d1..93d6ee768 100644
--- a/src/cpu/o3/btb.cc
+++ b/src/cpu/o3/btb.cc
@@ -32,8 +32,6 @@
#include "base/trace.hh"
#include "cpu/o3/btb.hh"
-using namespace TheISA;
-
DefaultBTB::DefaultBTB(unsigned _numEntries,
unsigned _tagBits,
unsigned _instShiftAmt)
diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh
index dfa3b7b06..3c4899e89 100644
--- a/src/cpu/o3/btb.hh
+++ b/src/cpu/o3/btb.hh
@@ -31,9 +31,8 @@
#ifndef __CPU_O3_BTB_HH__
#define __CPU_O3_BTB_HH__
-// For Addr type.
-#include "arch/isa_traits.hh"
#include "base/misc.hh"
+#include "sim/host.hh"
class DefaultBTB
{
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index bf1bd08e8..aa58fc20e 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -33,8 +33,7 @@
#include <vector>
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
+#include "sim/faults.hh"
#include "cpu/inst_seq.hh"
#include "sim/host.hh"
@@ -88,6 +87,7 @@ struct DefaultIEWDefaultCommit {
bool squash[Impl::MaxThreads];
bool branchMispredict[Impl::MaxThreads];
bool branchTaken[Impl::MaxThreads];
+ bool condDelaySlotBranch[Impl::MaxThreads];
uint64_t mispredPC[Impl::MaxThreads];
uint64_t nextPC[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
@@ -113,6 +113,7 @@ struct TimeBufStruct {
uint64_t branchAddr;
InstSeqNum doneSeqNum;
+ InstSeqNum bdelayDoneSeqNum;
// @todo: Might want to package this kind of branch stuff into a single
// struct as it is used pretty frequently.
@@ -165,6 +166,9 @@ struct TimeBufStruct {
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
+ InstSeqNum bdelayDoneSeqNum;
+ bool squashDelaySlot;
+
//Just in case we want to do a commit/squash on a cycle
//(necessary for multiple ROBs?)
bool commitInsts;
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 956b6ec3e..7575783f7 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -32,7 +32,6 @@
#ifndef __CPU_O3_COMMIT_HH__
#define __CPU_O3_COMMIT_HH__
-#include "arch/faults.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/exetrace.hh"
@@ -165,6 +164,9 @@ class DefaultCommit
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
+ /** Skid buffer between rename and commit. */
+ std::queue<DynInstPtr> skidBuffer;
+
/** The pointer to the IEW stage. Used solely to ensure that
* various events (traps, interrupts, syscalls) do not occur until
* all stores have written back.
@@ -256,6 +258,9 @@ class DefaultCommit
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
+ /** Insert all instructions from rename into skidBuffer */
+ void skidInsert();
+
/** Marks completed instructions using information sent from IEW. */
void markCompletedInsts();
@@ -286,13 +291,11 @@ class DefaultCommit
/** Sets the next PC of a specific thread. */
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
-#if THE_ISA != ALPHA_ISA
/** Reads the next NPC of a specific thread. */
- uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; }
+ uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
/** Sets the next NPC of a specific thread. */
- void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
-#endif
+ void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
private:
/** Time buffer interface. */
@@ -397,10 +400,8 @@ class DefaultCommit
/** The next PC of each thread. */
Addr nextPC[Impl::MaxThreads];
-#if THE_ISA != ALPHA_ISA
/** The next NPC of each thread. */
Addr nextNPC[Impl::MaxThreads];
-#endif
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[Impl::MaxThreads];
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 904af1071..f200f5f18 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -26,6 +26,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
+ * Korey Sewell
*/
#include "config/full_system.hh"
@@ -44,8 +45,6 @@
#include "cpu/checker/cpu.hh"
#endif
-using namespace std;
-
template <class Impl>
DefaultCommit<Impl>::TrapEvent::TrapEvent(DefaultCommit<Impl> *_commit,
unsigned _tid)
@@ -86,7 +85,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
{
_status = Active;
_nextStatus = Inactive;
- string policy = params->smtCommitPolicy;
+ std::string policy = params->smtCommitPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -120,7 +119,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
changedROBNumEntries[i] = false;
trapSquash[i] = false;
tcSquash[i] = false;
- PC[i] = nextPC[i] = 0;
+ PC[i] = nextPC[i] = nextNPC[i] = 0;
}
}
@@ -235,7 +234,7 @@ DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
template <class Impl>
void
-DefaultCommit<Impl>::setThreads(vector<Thread *> &threads)
+DefaultCommit<Impl>::setThreads(std::vector<Thread *> &threads)
{
thread = threads;
}
@@ -296,7 +295,7 @@ DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
template<class Impl>
void
-DefaultCommit<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -390,7 +389,7 @@ void
DefaultCommit<Impl>::updateStatus()
{
// reset ROB changed variable
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
changedROBNumEntries[tid] = false;
@@ -419,7 +418,7 @@ DefaultCommit<Impl>::setNextStatus()
{
int squashes = 0;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -442,7 +441,7 @@ template <class Impl>
bool
DefaultCommit<Impl>::changedROBEntries()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -569,7 +568,7 @@ DefaultCommit<Impl>::tick()
if ((*activeThreads).size() <= 0)
return;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
// status if they are done.
@@ -687,7 +686,7 @@ DefaultCommit<Impl>::commit()
// Check for any possible squashes, handle them first
////////////////////////////////////
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -723,14 +722,48 @@ DefaultCommit<Impl>::commit()
// then use one older sequence number.
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
- if (fromIEW->includeSquashInst[tid] == true)
- squashed_inst--;
+#if THE_ISA != ALPHA_ISA
+ InstSeqNum bdelay_done_seq_num;
+ bool squash_bdelay_slot;
+
+ if (fromIEW->branchMispredict[tid]) {
+ if (fromIEW->branchTaken[tid] &&
+ fromIEW->condDelaySlotBranch[tid]) {
+ DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch"
+ "mispredicted as taken. Squashing after previous "
+ "inst, [sn:%i]\n",
+ tid, squashed_inst);
+ bdelay_done_seq_num = squashed_inst;
+ squash_bdelay_slot = true;
+ } else {
+ DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing "
+ "after delay slot [sn:%i]\n", tid, squashed_inst+1);
+ bdelay_done_seq_num = squashed_inst + 1;
+ squash_bdelay_slot = false;
+ }
+ } else {
+ bdelay_done_seq_num = squashed_inst;
+ }
+#endif
+ if (fromIEW->includeSquashInst[tid] == true) {
+ squashed_inst--;
+#if THE_ISA != ALPHA_ISA
+ bdelay_done_seq_num--;
+#endif
+ }
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB.
youngestSeqNum[tid] = squashed_inst;
+#if THE_ISA == ALPHA_ISA
rob->squash(squashed_inst, tid);
+ toIEW->commitInfo[tid].squashDelaySlot = true;
+#else
+ rob->squash(bdelay_done_seq_num, tid);
+ toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
+ toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
+#endif
changedROBNumEntries[tid] = true;
toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
@@ -766,6 +799,10 @@ DefaultCommit<Impl>::commit()
// Try to commit any instructions.
commitInsts();
+ } else {
+#if THE_ISA != ALPHA_ISA
+ skidInsert();
+#endif
}
//Check for any activity
@@ -840,6 +877,7 @@ DefaultCommit<Impl>::commitInsts()
} else {
PC[tid] = head_inst->readPC();
nextPC[tid] = head_inst->readNextPC();
+ nextNPC[tid] = head_inst->readNextNPC();
// Increment the total number of non-speculative instructions
// executed.
@@ -868,7 +906,13 @@ DefaultCommit<Impl>::commitInsts()
}
PC[tid] = nextPC[tid];
+#if THE_ISA == ALPHA_ISA
nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
+#else
+ nextPC[tid] = nextNPC[tid];
+ nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
+#endif
+
#if FULL_SYSTEM
int count = 0;
Addr oldpc;
@@ -996,6 +1040,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = head_inst->getFault();
+ // DTB will sometimes need the machine instruction for when
+ // faults happen. So we will set it here, prior to the DTB
+ // possibly needing it for its fault.
+ thread[tid]->setInst(
+ static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
+
if (inst_fault != NoFault) {
head_inst->setCompleted();
DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
@@ -1018,12 +1068,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// execution doesn't generate extra squashes.
thread[tid]->inSyscall = true;
- // DTB will sometimes need the machine instruction for when
- // faults happen. So we will set it here, prior to the DTB
- // possibly needing it for its fault.
- thread[tid]->setInst(
- static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
-
// Execute the trap. Although it's slightly unrealistic in
// terms of timing (as it doesn't wait for the full timing of
// the trap event to complete before updating state), it's
@@ -1069,12 +1113,39 @@ template <class Impl>
void
DefaultCommit<Impl>::getInsts()
{
+ DPRINTF(Commit, "Getting instructions from Rename stage.\n");
+
+#if THE_ISA == ALPHA_ISA
+ // Read any renamed instructions and place them into the ROB.
+ int insts_to_process = std::min((int)renameWidth, fromRename->size);
+#else
// Read any renamed instructions and place them into the ROB.
- int insts_to_process = min((int)renameWidth, fromRename->size);
+ int insts_to_process = std::min((int)renameWidth,
+ (int)(fromRename->size + skidBuffer.size()));
+ int rename_idx = 0;
- for (int inst_num = 0; inst_num < insts_to_process; ++inst_num)
- {
- DynInstPtr inst = fromRename->insts[inst_num];
+ DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
+ "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
+ skidBuffer.size());
+#endif
+
+
+ for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
+ DynInstPtr inst;
+
+#if THE_ISA == ALPHA_ISA
+ inst = fromRename->insts[inst_num];
+#else
+ // Get insts from skidBuffer or from Rename
+ if (skidBuffer.size() > 0) {
+ DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
+ inst = skidBuffer.front();
+ skidBuffer.pop();
+ } else {
+ DPRINTF(Commit, "Grabbing rename inst.\n");
+ inst = fromRename->insts[rename_idx++];
+ }
+#endif
int tid = inst->threadNumber;
if (!inst->isSquashed() &&
@@ -1095,6 +1166,53 @@ DefaultCommit<Impl>::getInsts()
inst->readPC(), inst->seqNum, tid);
}
}
+
+#if THE_ISA != ALPHA_ISA
+ if (rename_idx < fromRename->size) {
+ DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
+
+ for (;
+ rename_idx < fromRename->size;
+ rename_idx++) {
+ DynInstPtr inst = fromRename->insts[rename_idx];
+ int tid = inst->threadNumber;
+
+ if (!inst->isSquashed()) {
+ DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
+ "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+ skidBuffer.push(inst);
+ } else {
+ DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ "squashed, skipping.\n",
+ inst->readPC(), inst->seqNum, tid);
+ }
+ }
+ }
+#endif
+
+}
+
+template <class Impl>
+void
+DefaultCommit<Impl>::skidInsert()
+{
+ DPRINTF(Commit, "Attempting to any instructions from rename into "
+ "skidBuffer.\n");
+
+ for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) {
+ DynInstPtr inst = fromRename->insts[inst_num];
+ int tid = inst->threadNumber;
+
+ if (!inst->isSquashed()) {
+ DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
+ "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+ skidBuffer.push(inst);
+ } else {
+ DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ "squashed, skipping.\n",
+ inst->readPC(), inst->seqNum, tid);
+ }
+ }
}
template <class Impl>
@@ -1124,7 +1242,7 @@ template <class Impl>
bool
DefaultCommit<Impl>::robDoneSquashing()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -1221,8 +1339,8 @@ template<class Impl>
int
DefaultCommit<Impl>::roundRobin()
{
- list<unsigned>::iterator pri_iter = priority_list.begin();
- list<unsigned>::iterator end = priority_list.end();
+ std::list<unsigned>::iterator pri_iter = priority_list.begin();
+ std::list<unsigned>::iterator end = priority_list.end();
while (pri_iter != end) {
unsigned tid = *pri_iter;
@@ -1252,7 +1370,7 @@ DefaultCommit<Impl>::oldestReady()
unsigned oldest = 0;
bool first = true;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 7d2727401..af032132e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -441,7 +441,7 @@ FullO3CPU<Impl>::tick()
if (!tickEvent.scheduled()) {
if (_status == SwitchedOut ||
- getState() == SimObject::DrainedTiming) {
+ getState() == SimObject::Drained) {
// increment stat
lastRunningCycle = curTick;
} else if (!activityRec.active()) {
@@ -577,39 +577,19 @@ void
FullO3CPU<Impl>::suspendContext(int tid)
{
DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
- unscheduleTickEvent();
+ deactivateThread(tid);
+ if (activeThreads.size() == 0)
+ unscheduleTickEvent();
_status = Idle;
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
- }
-*/
}
template <class Impl>
void
FullO3CPU<Impl>::haltContext(int tid)
{
- DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid);
-/*
- //Remove From Active List, if Active
- list<unsigned>::iterator isActive = find(
- activeThreads.begin(), activeThreads.end(), tid);
-
- if (isActive != activeThreads.end()) {
- DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
- tid);
- activeThreads.erase(isActive);
-
- removeThread(tid);
- }
-*/
+ //For now, this is the same as deallocate
+ DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
+ deallocateContext(tid, 1);
}
template <class Impl>
@@ -687,11 +667,12 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
}
// Squash Throughout Pipeline
- fetch.squash(0,tid);
+ InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
+ fetch.squash(0, squash_seq_num, true, tid);
decode.squash(tid);
- rename.squash(tid);
+ rename.squash(squash_seq_num, tid);
iew.squash(tid);
- commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid);
+ commit.rob->squash(squash_seq_num, tid);
assert(iew.ldstQueue.getCount(tid) == 0);
@@ -765,7 +746,8 @@ template <class Impl>
void
FullO3CPU<Impl>::serialize(std::ostream &os)
{
- SERIALIZE_ENUM(_status);
+ SimObject::State so_state = SimObject::getState();
+ SERIALIZE_ENUM(so_state);
BaseCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
@@ -786,7 +768,8 @@ template <class Impl>
void
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
- UNSERIALIZE_ENUM(_status);
+ SimObject::State so_state;
+ UNSERIALIZE_ENUM(so_state);
BaseCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
@@ -803,7 +786,7 @@ FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
}
template <class Impl>
-bool
+unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
drainCount = 0;
@@ -815,7 +798,7 @@ FullO3CPU<Impl>::drain(Event *drain_event)
// Wake the CPU and record activity so everything can drain out if
// the CPU was not able to immediately drain.
- if (getState() != SimObject::DrainedTiming) {
+ if (getState() != SimObject::Drained) {
// A bit of a hack...set the drainEvent after all the drain()
// calls have been made, that way if all of the stages drain
// immediately, the signalDrained() function knows not to call
@@ -825,9 +808,9 @@ FullO3CPU<Impl>::drain(Event *drain_event)
wakeCPU();
activityRec.activity();
- return false;
+ return 1;
} else {
- return true;
+ return 0;
}
}
@@ -835,19 +818,21 @@ template <class Impl>
void
FullO3CPU<Impl>::resume()
{
+ assert(system->getMemoryMode() == System::Timing);
fetch.resume();
decode.resume();
rename.resume();
iew.resume();
commit.resume();
+ changeState(SimObject::Running);
+
if (_status == SwitchedOut || _status == Idle)
return;
if (!tickEvent.scheduled())
tickEvent.schedule(curTick);
_status = Running;
- changeState(SimObject::Timing);
}
template <class Impl>
@@ -858,7 +843,7 @@ FullO3CPU<Impl>::signalDrained()
if (tickEvent.scheduled())
tickEvent.squash();
- changeState(SimObject::DrainedTiming);
+ changeState(SimObject::Drained);
if (drainEvent) {
drainEvent->process();
@@ -1063,7 +1048,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatReg(phys_reg, val);
}
@@ -1072,7 +1058,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatReg(phys_reg, val, 64);
}
@@ -1081,7 +1068,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegBits(phys_reg, val);
}
@@ -1114,7 +1102,6 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid)
commit.setNextPC(val, tid);
}
-#if THE_ISA != ALPHA_ISA
template <class Impl>
uint64_t
FullO3CPU<Impl>::readNextNPC(unsigned tid)
@@ -1124,11 +1111,10 @@ FullO3CPU<Impl>::readNextNPC(unsigned tid)
template <class Impl>
void
-FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid)
+FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
{
commit.setNextNPC(val, tid);
}
-#endif
template <class Impl>
typename FullO3CPU<Impl>::ListIt
@@ -1178,7 +1164,9 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
template <class Impl>
void
-FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
+FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
+ bool squash_delay_slot,
+ const InstSeqNum &delay_slot_seq_num)
{
DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
" list.\n", tid);
@@ -1209,6 +1197,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
while (inst_it != end_it) {
assert(!instList.empty());
+#if THE_ISA != ALPHA_ISA
+ if(!squash_delay_slot &&
+ delay_slot_seq_num >= (*inst_it)->seqNum) {
+ break;
+ }
+#endif
squashInstIt(inst_it, tid);
inst_it--;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2fbd013ac..7e18571f1 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -38,7 +38,7 @@
#include <set>
#include <vector>
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "config/full_system.hh"
@@ -330,7 +330,7 @@ class FullO3CPU : public BaseO3CPU
/** Starts draining the CPU's pipeline of all instructions in
* order to stop all memory accesses. */
- virtual bool drain(Event *drain_event);
+ virtual unsigned int drain(Event *drain_event);
/** Resumes execution after a drain. */
virtual void resume();
@@ -449,8 +449,10 @@ class FullO3CPU : public BaseO3CPU
*/
void removeFrontInst(DynInstPtr &inst);
- /** Remove all instructions that are not currently in the ROB. */
- void removeInstsNotInROB(unsigned tid);
+ /** Remove all instructions that are not currently in the ROB.
+ * There's also an option to not squash delay slot instructions.*/
+ void removeInstsNotInROB(unsigned tid, bool squash_delay_slot,
+ const InstSeqNum &delay_slot_seq_num);
/** Remove all instructions younger than the given sequence number. */
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 7f5ecbc26..4a845e670 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -276,6 +276,19 @@ class DefaultDecode
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
+ /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/
+ Addr bdelayDoneSeqNum[Impl::MaxThreads];
+
+ /** Instruction used for squashing branch (used for MIPS)*/
+ DynInstPtr squashInst[Impl::MaxThreads];
+
+ /** Tells when their is a pending delay slot inst. to send
+ * to rename. If there is, then wait squash after the next
+ * instruction (used for MIPS).
+ */
+ bool squashAfterDelaySlot[Impl::MaxThreads];
+
+
/** Stat for total number of idle cycles. */
Stats::Scalar<> decodeIdleCycles;
/** Stat for total number of blocked cycles. */
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 8b851c032..160845378 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -30,8 +30,6 @@
#include "cpu/o3/decode.hh"
-using namespace std;
-
template<class Impl>
DefaultDecode<Impl>::DefaultDecode(Params *params)
: renameToDecodeDelay(params->renameToDecodeDelay),
@@ -50,6 +48,8 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
stalls[i].rename = false;
stalls[i].iew = false;
stalls[i].commit = false;
+
+ squashAfterDelaySlot[i] = false;
}
// @todo: Make into a parameter
@@ -158,7 +158,7 @@ DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
template<class Impl>
void
-DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(Decode, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -278,13 +278,25 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
// Send back mispredict information.
toFetch->decodeInfo[tid].branchMispredict = true;
- toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
+ toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
+#if THE_ISA == ALPHA_ISA
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
+ InstSeqNum squash_seq_num = inst->seqNum;
+#else
+ toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
+ (inst->readNextPC() + sizeof(TheISA::MachInst));
+
+ toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
+ squashAfterDelaySlot[tid] = false;
+
+ InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
+#endif
+
// Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
@@ -296,7 +308,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
- fromFetch->insts[i]->seqNum > inst->seqNum) {
+ fromFetch->insts[i]->seqNum > squash_seq_num) {
fromFetch->insts[i]->setSquashed();
}
}
@@ -304,15 +316,35 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
+
+#if THE_ISA != ALPHA_ISA
+ if (insts[tid].front()->seqNum <= squash_seq_num) {
+ DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
+ "instructions before delay slot [sn:%i]. %i insts"
+ "left in decode.\n", tid, squash_seq_num,
+ insts[tid].size());
+ break;
+ }
+#endif
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
+
+#if THE_ISA != ALPHA_ISA
+ if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
+ DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
+ "instructions before delay slot [sn:%i]. %i insts"
+ "left in decode.\n", tid, squash_seq_num,
+ insts[tid].size());
+ break;
+ }
+#endif
skidBuffer[tid].pop();
}
// Squash instructions up until this one
- cpu->removeInstsUntil(inst->seqNum, tid);
+ cpu->removeInstsUntil(squash_seq_num, tid);
}
template<class Impl>
@@ -392,7 +424,7 @@ template<class Impl>
bool
DefaultDecode<Impl>::skidsEmpty()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
if (!skidBuffer[*threads++].empty())
@@ -408,7 +440,7 @@ DefaultDecode<Impl>::updateStatus()
{
bool any_unblocking = false;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
threads = (*activeThreads).begin();
@@ -565,7 +597,7 @@ DefaultDecode<Impl>::tick()
toRenameIndex = 0;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
sortInsts();
@@ -611,7 +643,7 @@ DefaultDecode<Impl>::decode(bool &status_change, unsigned tid)
// will allow, as long as it is not currently blocked.
if (decodeStatus[tid] == Running ||
decodeStatus[tid] == Idle) {
- DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run "
+ DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run "
"stage.\n",tid);
decodeInsts(tid);
@@ -710,6 +742,9 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Ensure that if it was predicted as a branch, it really is a
// branch.
if (inst->predTaken() && !inst->isControl()) {
+ DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC,
+ inst->nextPC + 4);
+
panic("Instruction predicted as a branch!");
++decodeControlMispred;
@@ -730,12 +765,43 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Might want to set some sort of boolean and just do
// a check at the end
+#if THE_ISA == ALPHA_ISA
squash(inst, inst->threadNumber);
inst->setPredTarg(inst->branchTarget());
-
break;
+#else
+ // If mispredicted as taken, then ignore delay slot
+ // instruction... else keep delay slot and squash
+ // after it is sent to rename
+ if (inst->predTaken() && inst->isCondDelaySlot()) {
+ DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
+ "[sn:%i] PC %#x mispredicted as taken.\n", tid,
+ inst->seqNum, inst->PC);
+ bdelayDoneSeqNum[tid] = inst->seqNum;
+ squash(inst, inst->threadNumber);
+ inst->setPredTarg(inst->branchTarget());
+ break;
+ } else {
+ DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
+ "[sn:%i] PC %#x, will squash after delay slot "
+ "inst. is sent to Rename\n",
+ tid, inst->seqNum, inst->PC);
+ bdelayDoneSeqNum[tid] = inst->seqNum + 1;
+ squashAfterDelaySlot[tid] = true;
+ squashInst[tid] = inst;
+ continue;
+ }
+#endif
}
}
+
+ if (squashAfterDelaySlot[tid]) {
+ assert(!inst->isSquashed());
+ squash(squashInst[tid], squashInst[tid]->threadNumber);
+ squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget());
+ assert(!inst->isSquashed());
+ break;
+ }
}
// If we didn't process all instructions, then we will need to block
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index a2cdf2dba..279513493 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -34,12 +34,19 @@
#include "arch/isa_specific.hh"
#if THE_ISA == ALPHA_ISA
-template <class Impl>
-class AlphaDynInst;
-
-struct AlphaSimpleImpl;
-
-typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst;
+ template <class Impl> class AlphaDynInst;
+ struct AlphaSimpleImpl;
+ typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst;
+#elif THE_ISA == MIPS_ISA
+ template <class Impl> class MipsDynInst;
+ struct MipsSimpleImpl;
+ typedef MipsDynInst<MipsSimpleImpl> O3DynInst;
+#elif THE_ISA == SPARC_ISA
+ template <class Impl> class SparcDynInst;
+ struct SparcSimpleImpl;
+ typedef SparcDynInst<SparcSimpleImpl> O3DynInst;
+#else
+ #error "O3DynInst not defined for this ISA"
#endif
#endif // __CPU_O3_DYN_INST_HH__
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 85654cebc..1a2ca32a4 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -106,6 +106,7 @@ class DefaultFetch
virtual void recvRetry();
};
+
public:
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
@@ -218,9 +219,10 @@ class DefaultFetch
* @param next_PC Next PC variable passed in by reference. It is
* expected to be set to the current PC; it will be updated with what
* the next PC will be.
+ * @param next_NPC Used for ISAs which use delay slots.
* @return Whether or not a branch was predicted as taken.
*/
- bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);
+ bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
/**
* Fetches the cache line that contains fetch_PC. Returns any
@@ -255,7 +257,8 @@ class DefaultFetch
* remove any instructions that are not in the ROB. The source of this
* squash should be the commit stage.
*/
- void squash(const Addr &new_PC, unsigned tid);
+ void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ bool squash_delay_slot, unsigned tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
* as many instructions as possible.
@@ -340,14 +343,12 @@ class DefaultFetch
/** Per-thread next PC. */
Addr nextPC[Impl::MaxThreads];
-#if THE_ISA != ALPHA_ISA
/** Per-thread next Next PC.
* This is not a real register but is used for
* architectures that use a branch-delay slot.
* (such as MIPS or Sparc)
*/
Addr nextNPC[Impl::MaxThreads];
-#endif
/** Memory request used to access cache. */
RequestPtr memReq[Impl::MaxThreads];
@@ -360,6 +361,19 @@ class DefaultFetch
/** Tracks how many instructions has been fetched this cycle. */
int numInst;
+ /** Tracks delay slot information for threads in ISAs which use
+ * delay slots;
+ */
+ struct DelaySlotInfo {
+ InstSeqNum delaySlotSeqNum;
+ InstSeqNum branchSeqNum;
+ int numInsts;
+ Addr targetAddr;
+ bool targetReady;
+ };
+
+ DelaySlotInfo delaySlotInfo[Impl::MaxThreads];
+
/** Source of possible stalls. */
struct Stalls {
bool decode;
@@ -404,6 +418,12 @@ class DefaultFetch
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
+ /** The PC of the cacheline that has been loaded. */
+ Addr cacheDataPC[Impl::MaxThreads];
+
+ /** Whether or not the cache data is valid. */
+ bool cacheDataValid[Impl::MaxThreads];
+
/** Size of instructions. */
int instSize;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 39a13f9f8..990db88ac 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -51,9 +51,6 @@
#include <algorithm>
-using namespace std;
-using namespace TheISA;
-
template<class Impl>
Tick
DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
@@ -118,7 +115,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Set fetch stage's status to inactive.
_status = Inactive;
- string policy = params->smtFetchPolicy;
+ std::string policy = params->smtFetchPolicy;
// Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -162,15 +159,22 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
+
+ delaySlotInfo[tid].branchSeqNum = -1;
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
- stalls[tid].decode = 0;
- stalls[tid].rename = 0;
- stalls[tid].iew = 0;
- stalls[tid].commit = 0;
+ stalls[tid].decode = false;
+ stalls[tid].rename = false;
+ stalls[tid].iew = false;
+ stalls[tid].commit = false;
}
// Get the size of an instruction.
- instSize = sizeof(MachInst);
+ instSize = sizeof(TheISA::MachInst);
}
template <class Impl>
@@ -286,6 +290,9 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
}
#endif
+ // Schedule fetch to get the correct PC from the CPU
+ // scheduleFetchStartupEvent(1);
+
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
@@ -307,7 +314,7 @@ DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
template<class Impl>
void
-DefaultFetch<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(Fetch, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -358,6 +365,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
}
memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+ cacheDataValid[tid] = true;
if (!drainPending) {
// Wake up the CPU (if it went to sleep and was waiting on
@@ -423,6 +431,10 @@ DefaultFetch<Impl>::takeOverFrom()
nextPC[i] = cpu->readNextPC(i);
#if THE_ISA != ALPHA_ISA
nextNPC[i] = cpu->readNextNPC(i);
+ delaySlotInfo[i].branchSeqNum = -1;
+ delaySlotInfo[i].numInsts = 0;
+ delaySlotInfo[i].targetAddr = 0;
+ delaySlotInfo[i].targetReady = false;
#endif
fetchStatus[i] = Running;
}
@@ -471,7 +483,8 @@ DefaultFetch<Impl>::switchToInactive()
template <class Impl>
bool
-DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
+DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
+ Addr &next_NPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
@@ -479,12 +492,54 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
bool predict_taken;
if (!inst->isControl()) {
+#if THE_ISA == ALPHA_ISA
next_PC = next_PC + instSize;
inst->setPredTarg(next_PC);
+#else
+ Addr cur_PC = next_PC;
+ next_PC = cur_PC + instSize; //next_NPC;
+ next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
+ inst->setPredTarg(next_NPC);
+#endif
return false;
}
- predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
+ int tid = inst->threadNumber;
+#if THE_ISA == ALPHA_ISA
+ predict_taken = branchPred.predict(inst, next_PC, tid);
+#else
+ Addr pred_PC = next_PC;
+ predict_taken = branchPred.predict(inst, pred_PC, tid);
+
+ if (predict_taken) {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+ } else {
+ DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+ }
+
+ if (predict_taken) {
+ next_PC = next_NPC;
+ next_NPC = pred_PC;
+
+ // Update delay slot info
+ ++delaySlotInfo[tid].numInsts;
+ delaySlotInfo[tid].targetAddr = pred_PC;
+ DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
+ delaySlotInfo[tid].numInsts);
+ } else { // !predict_taken
+ if (inst->isCondDelaySlot()) {
+ next_PC = pred_PC;
+ // The delay slot is skipped here if there is on
+ // prediction
+ } else {
+ next_PC = next_NPC;
+ // No need to declare a delay slot here since
+ // there is no for the pred. target to jump
+ }
+
+ next_NPC = next_NPC + instSize;
+ }
+#endif
++fetchedBranches;
@@ -519,6 +574,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
+ // If we've already got the block, no need to try to fetch it again.
+ if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+ return true;
+ }
+
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
// Build request here.
@@ -550,7 +610,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Build packet here.
PacketPtr data_pkt = new Packet(mem_req,
Packet::ReadReq, Packet::Broadcast);
- data_pkt->dataDynamic(new uint8_t[cacheBlkSize]);
+ data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
+
+ cacheDataPC[tid] = fetch_PC;
+ cacheDataValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
@@ -595,6 +658,7 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
PC[tid] = new_PC;
nextPC[tid] = new_PC + instSize;
+ nextNPC[tid] = new_PC + (2 * instSize);
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -628,6 +692,14 @@ DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
doSquash(new_PC, tid);
+#if THE_ISA != ALPHA_ISA
+ if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
+ }
+#endif
+
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num, tid);
@@ -664,7 +736,7 @@ typename DefaultFetch<Impl>::FetchStatus
DefaultFetch<Impl>::updateFetchStatus()
{
//Check Running
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
@@ -701,21 +773,33 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
-DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ bool squash_delay_slot, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
doSquash(new_PC, tid);
+#if THE_ISA == ALPHA_ISA
+ // Tell the CPU to remove any instructions that are not in the ROB.
+ cpu->removeInstsNotInROB(tid, true, 0);
+#else
+ if (seq_num <= delaySlotInfo[tid].branchSeqNum) {
+ delaySlotInfo[tid].numInsts = 0;
+ delaySlotInfo[tid].targetAddr = 0;
+ delaySlotInfo[tid].targetReady = false;
+ }
+
// Tell the CPU to remove any instructions that are not in the ROB.
- cpu->removeInstsNotInROB(tid);
+ cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
+#endif
}
template <class Impl>
void
DefaultFetch<Impl>::tick()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
bool status_change = false;
wroteToTimeBuffer = false;
@@ -817,8 +901,16 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
"from commit.\n",tid);
+#if THE_ISA == ALPHA_ISA
+ InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
+#else
+ InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+#endif
// In any case, squash.
- squash(fromCommit->commitInfo[tid].nextPC,tid);
+ squash(fromCommit->commitInfo[tid].nextPC,
+ doneSeqNum,
+ fromCommit->commitInfo[tid].squashDelaySlot,
+ tid);
// Also check if there's a mispredict that happened.
if (fromCommit->commitInfo[tid].branchMispredict) {
@@ -865,9 +957,15 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
}
if (fetchStatus[tid] != Squashing) {
+
+#if THE_ISA == ALPHA_ISA
+ InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
+#else
+ InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
+#endif
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
- fromDecode->decodeInfo[tid].doneSeqNum,
+ doneSeqNum,
tid);
return true;
@@ -973,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
}
Addr next_PC = fetch_PC;
+ Addr next_NPC = next_PC + instSize;
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
@@ -991,10 +1090,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// ended this fetch block.
bool predicted_branch = false;
+ // Need to keep track of whether or not a delay slot
+ // instruction has been fetched
+
for (;
offset < cacheBlkSize &&
numInst < fetchWidth &&
- !predicted_branch;
+ (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
++numInst) {
// Get a sequence number.
@@ -1004,7 +1106,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
assert(offset <= cacheBlkSize - instSize);
// Get the instruction from the array of the cache line.
- inst = gtoh(*reinterpret_cast<MachInst *>
+ inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
(&cacheData[tid][offset]));
ext_inst = TheISA::makeExtMI(inst, fetch_PC);
@@ -1031,7 +1133,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
instruction->staticInst,
instruction->readPC(),tid);
- predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+ predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
+ next_NPC);
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
@@ -1057,7 +1160,41 @@ DefaultFetch<Impl>::fetch(bool &status_change)
break;
}
- offset+= instSize;
+ offset += instSize;
+
+#if THE_ISA != ALPHA_ISA
+ if (predicted_branch) {
+ delaySlotInfo[tid].branchSeqNum = inst_seq;
+
+ DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
+ tid, inst_seq);
+ continue;
+ } else if (delaySlotInfo[tid].numInsts > 0) {
+ --delaySlotInfo[tid].numInsts;
+
+ // It's OK to set PC to target of branch
+ if (delaySlotInfo[tid].numInsts == 0) {
+ delaySlotInfo[tid].targetReady = true;
+
+ // Break the looping condition
+ predicted_branch = true;
+ }
+
+ DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
+ " process.\n", tid, delaySlotInfo[tid].numInsts);
+ }
+#endif
+ }
+
+ if (offset >= cacheBlkSize) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+ "block.\n", tid);
+ } else if (numInst >= fetchWidth) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
+ "for this cycle.\n", tid);
+ } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+ DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+ "instruction encountered.\n", tid);
}
}
@@ -1068,18 +1205,26 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
- DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
-
#if THE_ISA == ALPHA_ISA
+ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
PC[tid] = next_PC;
nextPC[tid] = next_PC + instSize;
#else
- PC[tid] = next_PC;
- nextPC[tid] = next_PC + instSize;
- nextPC[tid] = next_PC + instSize;
+ if (delaySlotInfo[tid].targetReady &&
+ delaySlotInfo[tid].numInsts == 0) {
+ // Set PC to target
+ PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
+ nextPC[tid] = next_PC + instSize; //next_NPC
+ nextNPC[tid] = next_PC + (2 * instSize);
+
+ delaySlotInfo[tid].targetReady = false;
+ } else {
+ PC[tid] = next_PC;
+ nextPC[tid] = next_NPC;
+ nextNPC[tid] = next_NPC + instSize;
+ }
- thread->setNextPC(thread->readNextNPC());
- thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
#endif
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1123,9 +1268,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
- warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+ warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#else // !FULL_SYSTEM
- warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+ warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#endif // FULL_SYSTEM
}
}
@@ -1202,8 +1347,8 @@ template<class Impl>
int
DefaultFetch<Impl>::roundRobin()
{
- list<unsigned>::iterator pri_iter = priorityList.begin();
- list<unsigned>::iterator end = priorityList.end();
+ std::list<unsigned>::iterator pri_iter = priorityList.begin();
+ std::list<unsigned>::iterator end = priorityList.end();
int high_pri;
@@ -1232,9 +1377,9 @@ template<class Impl>
int
DefaultFetch<Impl>::iqCount()
{
- priority_queue<unsigned> PQ;
+ std::priority_queue<unsigned> PQ;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -1262,10 +1407,10 @@ template<class Impl>
int
DefaultFetch<Impl>::lsqCount()
{
- priority_queue<unsigned> PQ;
+ std::priority_queue<unsigned> PQ;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -1293,7 +1438,7 @@ template<class Impl>
int
DefaultFetch<Impl>::branchCount()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
panic("Branch Count Fetch policy unimplemented\n");
return *threads;
}
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index fb9afde54..76fa008ee 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -31,11 +31,12 @@
#ifndef __CPU_O3_IEW_HH__
#define __CPU_O3_IEW_HH__
+#include "config/full_system.hh"
+
#include <queue>
#include "base/statistics.hh"
#include "base/timebuf.hh"
-#include "config/full_system.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/scoreboard.hh"
#include "cpu/o3/lsq.hh"
@@ -215,7 +216,7 @@ class DefaultIEW
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
-#if DEBUG
+#ifdef DEBUG
wbList.insert(sn);
#endif
}
@@ -225,13 +226,13 @@ class DefaultIEW
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
-#if DEBUG
+#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
#endif
}
-#if DEBUG
+#ifdef DEBUG
std::set<InstSeqNum> wbList;
void dumpWb()
@@ -404,6 +405,9 @@ class DefaultIEW
/** Records if there is a fetch redirect on this cycle for each thread. */
bool fetchRedirect[Impl::MaxThreads];
+ /** Keeps track of the last valid branch delay slot instss for threads */
+ InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
+
/** Used to track if all instructions have been dispatched this cycle.
* If they have not, then blocking must have occurred, and the instructions
* would already be added to the skid buffer.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 684ae2295..cdc36c6c3 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -38,8 +38,6 @@
#include "cpu/o3/fu_pool.hh"
#include "cpu/o3/iew.hh"
-using namespace std;
-
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: issueToExecQueue(params->backComSize, params->forwardComSize),
@@ -73,6 +71,7 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
dispatchStatus[i] = Running;
stalls[i].commit = false;
fetchRedirect[i] = false;
+ bdelayDoneSeqNum[i] = 0;
}
wbMax = wbWidth * params->wbDepth;
@@ -335,7 +334,7 @@ DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
template<class Impl>
void
-DefaultIEW<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(IEW, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -428,13 +427,31 @@ DefaultIEW<Impl>::squash(unsigned tid)
instQueue.squash(tid);
// Tell the LDSTQ to start squashing.
+#if THE_ISA == ALPHA_ISA
ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
-
+#else
+ ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
+#endif
updatedQueues = true;
// Clear the skid buffer in case it has any data in it.
- while (!skidBuffer[tid].empty()) {
+ DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
+ tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
+ while (!skidBuffer[tid].empty()) {
+#if THE_ISA != ALPHA_ISA
+ if (skidBuffer[tid].front()->seqNum <=
+ fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
+ DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
+ "that occur before delay slot [sn:%i].\n",
+ fromCommit->commitInfo[tid].bdelayDoneSeqNum,
+ tid);
+ break;
+ } else {
+ DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
+ "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
+ }
+#endif
if (skidBuffer[tid].front()->isLoad() ||
skidBuffer[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
@@ -445,6 +462,8 @@ DefaultIEW<Impl>::squash(unsigned tid)
skidBuffer[tid].pop();
}
+ bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+
emptyRenameInsts(tid);
}
@@ -458,10 +477,26 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->mispredPC[tid] = inst->readPC();
- toCommit->nextPC[tid] = inst->readNextPC();
toCommit->branchMispredict[tid] = true;
+
+#if THE_ISA == ALPHA_ISA
toCommit->branchTaken[tid] = inst->readNextPC() !=
(inst->readPC() + sizeof(TheISA::MachInst));
+ toCommit->nextPC[tid] = inst->readNextPC();
+#else
+ bool branch_taken = inst->readNextNPC() !=
+ (inst->readNextPC() + sizeof(TheISA::MachInst));
+
+ toCommit->branchTaken[tid] = branch_taken;
+
+ toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot();
+
+ if (inst->isCondDelaySlot() && branch_taken) {
+ toCommit->nextPC[tid] = inst->readNextPC();
+ } else {
+ toCommit->nextPC[tid] = inst->readNextNPC();
+ }
+#endif
toCommit->includeSquashInst[tid] = false;
@@ -626,7 +661,7 @@ DefaultIEW<Impl>::skidCount()
{
int max=0;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned thread_count = skidBuffer[*threads++].size();
@@ -641,7 +676,7 @@ template<class Impl>
bool
DefaultIEW<Impl>::skidsEmpty()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
if (!skidBuffer[*threads++].empty())
@@ -657,7 +692,7 @@ DefaultIEW<Impl>::updateStatus()
{
bool any_unblocking = false;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
threads = (*activeThreads).begin();
@@ -825,9 +860,11 @@ DefaultIEW<Impl>::sortInsts()
{
int insts_from_rename = fromRename->size;
#ifdef DEBUG
+#if THE_ISA == ALPHA_ISA
for (int i = 0; i < numThreads; i++)
assert(insts[i].empty());
#endif
+#endif
for (int i = 0; i < insts_from_rename; ++i) {
insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
}
@@ -837,7 +874,23 @@ template <class Impl>
void
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
{
+ DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
+ "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
+
while (!insts[tid].empty()) {
+
+#if THE_ISA != ALPHA_ISA
+ if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
+ DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
+ " that occurs at or before delay slot [sn:%i].\n",
+ tid, bdelayDoneSeqNum[tid]);
+ break;
+ } else {
+ DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
+ "[sn:%i].\n", tid, insts[tid].front()->seqNum);
+ }
+#endif
+
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
@@ -1120,7 +1173,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
}
if (!insts_to_dispatch.empty()) {
- DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n");
+ DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid);
block(tid);
toRename->iewUnblock[tid] = false;
}
@@ -1140,13 +1193,13 @@ DefaultIEW<Impl>::printAvailableInsts()
{
int inst = 0;
- cout << "Available Instructions: ";
+ std::cout << "Available Instructions: ";
while (fromIssue->insts[inst]) {
- if (inst%3==0) cout << "\n\t";
+ if (inst%3==0) std::cout << "\n\t";
- cout << "PC: " << fromIssue->insts[inst]->readPC()
+ std::cout << "PC: " << fromIssue->insts[inst]->readPC()
<< " TN: " << fromIssue->insts[inst]->threadNumber
<< " SN: " << fromIssue->insts[inst]->seqNum << " | ";
@@ -1154,7 +1207,7 @@ DefaultIEW<Impl>::printAvailableInsts()
}
- cout << "\n";
+ std::cout << "\n";
}
template <class Impl>
@@ -1164,7 +1217,7 @@ DefaultIEW<Impl>::executeInsts()
wbNumInst = 0;
wbCycle = 0;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -1263,9 +1316,13 @@ DefaultIEW<Impl>::executeInsts()
fetchRedirect[tid] = true;
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
+#if THE_ISA == ALPHA_ISA
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
inst->nextPC);
-
+#else
+ DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
+ inst->nextNPC);
+#endif
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);
@@ -1384,7 +1441,7 @@ DefaultIEW<Impl>::tick()
// Free function units marked as being freed this cycle.
fuPool->processFreeUnits();
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
// Check stall and squash signals, dispatch any instructions.
while (threads != (*activeThreads).end()) {
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 4c69ca384..d745faf7b 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -26,7 +26,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
- * Korey Sewell
*/
#ifndef __CPU_O3_INST_QUEUE_HH__
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 36e0842be..e7991662b 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -37,8 +37,6 @@
#include "cpu/o3/fu_pool.hh"
#include "cpu/o3/inst_queue.hh"
-using namespace std;
-
template <class Impl>
InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
int fu_idx,
@@ -100,7 +98,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
resetState();
- string policy = params->smtIQPolicy;
+ std::string policy = params->smtIQPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -279,7 +277,7 @@ InstructionQueue<Impl>::regStats()
;
for (int i=0; i<Num_OpClasses; ++i) {
- stringstream subname;
+ std::stringstream subname;
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
@@ -359,7 +357,7 @@ InstructionQueue<Impl>::resetState()
template <class Impl>
void
-InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(IQ, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -421,8 +419,8 @@ InstructionQueue<Impl>::resetEntries()
if (iqPolicy != Dynamic || numThreads > 1) {
int active_threads = (*activeThreads).size();
- list<unsigned>::iterator threads = (*activeThreads).begin();
- list<unsigned>::iterator list_end = (*activeThreads).end();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator list_end = (*activeThreads).end();
while (threads != list_end) {
if (iqPolicy == Partitioned) {
@@ -993,7 +991,11 @@ InstructionQueue<Impl>::squash(unsigned tid)
// Read instruction sequence number of last instruction out of the
// time buffer.
+#if THE_ISA == ALPHA_ISA
squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+#else
+ squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+#endif
// Call doSquash if there are insts in the IQ
if (count[tid] > 0) {
diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh
index f8a9dd8cc..4937589e3 100755
--- a/src/cpu/o3/isa_specific.hh
+++ b/src/cpu/o3/isa_specific.hh
@@ -35,6 +35,11 @@
#include "cpu/o3/alpha/impl.hh"
#include "cpu/o3/alpha/params.hh"
#include "cpu/o3/alpha/dyn_inst.hh"
+#elif THE_ISA == MIPS_ISA
+ #include "cpu/o3/mips/cpu.hh"
+ #include "cpu/o3/mips/impl.hh"
+ #include "cpu/o3/mips/params.hh"
+ #include "cpu/o3/mips/dyn_inst.hh"
#else
- #error "O3CPU doesnt support this ISA"
+ #error "ISA-specific header files O3CPU not defined ISA"
#endif
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index d5890950f..190734dc2 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -70,7 +70,7 @@ class LSQ {
* to work. For now it just returns the port from one of the
* threads.
*/
- Port *getDcachePort() { return thread[0].getDcachePort(); }
+ Port *getDcachePort() { return &dcachePort; }
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
@@ -258,6 +258,15 @@ class LSQ {
bool willWB(unsigned tid)
{ return thread[tid].willWB(); }
+ /** Returns if the cache is currently blocked. */
+ bool cacheBlocked()
+ { return retryTid != -1; }
+
+ /** Sets the retry thread id, indicating that one of the LSQUnits
+ * tried to access the cache but the cache was blocked. */
+ void setRetryTid(int tid)
+ { retryTid = tid; }
+
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
@@ -274,7 +283,49 @@ class LSQ {
template <class T>
Fault write(RequestPtr req, T &data, int store_idx);
- private:
+ /** DcachePort class for this LSQ. Handles doing the
+ * communication with the cache/memory.
+ */
+ class DcachePort : public Port
+ {
+ protected:
+ /** Pointer to LSQ. */
+ LSQ *lsq;
+
+ public:
+ /** Default constructor. */
+ DcachePort(LSQ *_lsq)
+ : lsq(_lsq)
+ { }
+
+ protected:
+ /** Atomic version of receive. Panics. */
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ /** Functional version of receive. Panics. */
+ virtual void recvFunctional(PacketPtr pkt);
+
+ /** Receives status change. Other than range changing, panics. */
+ virtual void recvStatusChange(Status status);
+
+ /** Returns the address ranges of this device. */
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ /** Timing version of receive. Handles writing back and
+ * completing the load or store that has returned from
+ * memory. */
+ virtual bool recvTiming(PacketPtr pkt);
+
+ /** Handles doing a retry of the previous send. */
+ virtual void recvRetry();
+ };
+
+ /** D-cache port. */
+ DcachePort dcachePort;
+
+ protected:
/** The LSQ policy for SMT mode. */
LSQPolicy lsqPolicy;
@@ -303,6 +354,10 @@ class LSQ {
/** Number of Threads. */
unsigned numThreads;
+
+ /** The thread id of the LSQ Unit that is currently waiting for a
+ * retry. */
+ int retryTid;
};
template <class Impl>
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 89fd1a71d..db2c253e1 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -29,23 +29,66 @@
*/
#include <algorithm>
+#include <list>
#include <string>
#include "cpu/o3/lsq.hh"
-using namespace std;
+template <class Impl>
+Tick
+LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+ panic("O3CPU model does not work with atomic mode!");
+ return curTick;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+ lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+ return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvRetry()
+{
+ lsq->thread[lsq->retryTid].recvRetry();
+ // Speculatively clear the retry Tid. This will get set again if
+ // the LSQUnit was unable to complete its access.
+ lsq->retryTid = -1;
+}
template <class Impl>
LSQ<Impl>::LSQ(Params *params)
- : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
- numThreads(params->numberOfThreads)
+ : dcachePort(this), LQEntries(params->LQEntries),
+ SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+ retryTid(-1)
{
DPRINTF(LSQ, "Creating LSQ object.\n");
//**********************************************/
//************ Handle SMT Parameters ***********/
//**********************************************/
- string policy = params->smtLSQPolicy;
+ std::string policy = params->smtLSQPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -94,7 +137,8 @@ LSQ<Impl>::LSQ(Params *params)
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+ thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+ thread[tid].setDcachePort(&dcachePort);
}
}
@@ -118,7 +162,7 @@ LSQ<Impl>::regStats()
template<class Impl>
void
-LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
activeThreads = at_ptr;
assert(activeThreads != 0);
@@ -130,6 +174,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
+ dcachePort.setName(name());
+
for (int tid=0; tid < numThreads; tid++) {
thread[tid].setCPU(cpu_ptr);
}
@@ -182,8 +228,8 @@ LSQ<Impl>::resetEntries()
if (lsqPolicy != Dynamic || numThreads > 1) {
int active_threads = (*activeThreads).size();
- list<unsigned>::iterator threads = (*activeThreads).begin();
- list<unsigned>::iterator list_end = (*activeThreads).end();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator list_end = (*activeThreads).end();
int maxEntries;
@@ -221,7 +267,7 @@ template<class Impl>
void
LSQ<Impl>::tick()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -270,7 +316,7 @@ template<class Impl>
void
LSQ<Impl>::writebackStores()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -289,7 +335,7 @@ bool
LSQ<Impl>::violation()
{
/* Answers: Does Anybody Have a Violation?*/
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -306,7 +352,7 @@ LSQ<Impl>::getCount()
{
unsigned total = 0;
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -322,7 +368,7 @@ LSQ<Impl>::numLoads()
{
unsigned total = 0;
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -338,7 +384,7 @@ LSQ<Impl>::numStores()
{
unsigned total = 0;
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -354,7 +400,7 @@ LSQ<Impl>::numLoadsReady()
{
unsigned total = 0;
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -370,7 +416,7 @@ LSQ<Impl>::numFreeEntries()
{
unsigned total = 0;
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -394,7 +440,7 @@ template<class Impl>
bool
LSQ<Impl>::isFull()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -421,7 +467,7 @@ template<class Impl>
bool
LSQ<Impl>::lqFull()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -448,7 +494,7 @@ template<class Impl>
bool
LSQ<Impl>::sqFull()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -475,7 +521,7 @@ template<class Impl>
bool
LSQ<Impl>::isStalled()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -500,7 +546,7 @@ template<class Impl>
bool
LSQ<Impl>::hasStoresToWB()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
if ((*activeThreads).empty())
return false;
@@ -518,7 +564,7 @@ template<class Impl>
bool
LSQ<Impl>::willWB()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
@@ -533,7 +579,7 @@ template<class Impl>
void
LSQ<Impl>::dumpInsts()
{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
while (active_threads != (*activeThreads).end()) {
unsigned tid = *active_threads++;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 4d7a8350b..512b5a63c 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -64,6 +64,7 @@ class LSQUnit {
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::LSQ LSQ;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
public:
@@ -71,17 +72,12 @@ class LSQUnit {
LSQUnit();
/** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
+ void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
- /** Returns the dcache port.
- * @todo: Remove this once the port moves up to the LSQ level.
- */
- Port *getDcachePort() { return dcachePort; }
-
/** Registers statistics. */
void regStats();
@@ -92,6 +88,10 @@ class LSQUnit {
void setIEW(IEW *iew_ptr)
{ iewStage = iew_ptr; }
+ /** Sets the pointer to the dcache port. */
+ void setDcachePort(Port *dcache_port)
+ { dcachePort = dcache_port; }
+
/** Switches out LSQ unit. */
void switchOut();
@@ -211,6 +211,9 @@ class LSQUnit {
!storeQueue[storeWBIdx].completed &&
!isStoreBlocked; }
+ /** Handles doing the retry. */
+ void recvRetry();
+
private:
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
@@ -221,9 +224,6 @@ class LSQUnit {
/** Completes the store at the specified index. */
void completeStore(int store_idx);
- /** Handles doing the retry. */
- void recvRetry();
-
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx);
/** Decrements the given store index (circular queue). */
@@ -244,54 +244,11 @@ class LSQUnit {
/** Pointer to the IEW stage. */
IEW *iewStage;
- /** Pointer to memory object. */
- MemObject *mem;
+ /** Pointer to the LSQ. */
+ LSQ *lsq;
- /** DcachePort class for this LSQ Unit. Handles doing the
- * communication with the cache/memory.
- * @todo: Needs to be moved to the LSQ level and have some sort
- * of arbitration.
- */
- class DcachePort : public Port
- {
- protected:
- /** Pointer to CPU. */
- O3CPU *cpu;
- /** Pointer to LSQ. */
- LSQUnit *lsq;
-
- public:
- /** Default constructor. */
- DcachePort(O3CPU *_cpu, LSQUnit *_lsq)
- : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
- { }
-
- protected:
- /** Atomic version of receive. Panics. */
- virtual Tick recvAtomic(PacketPtr pkt);
-
- /** Functional version of receive. Panics. */
- virtual void recvFunctional(PacketPtr pkt);
-
- /** Receives status change. Other than range changing, panics. */
- virtual void recvStatusChange(Status status);
-
- /** Returns the address ranges of this device. */
- virtual void getDeviceAddressRanges(AddrRangeList &resp,
- AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
-
- /** Timing version of receive. Handles writing back and
- * completing the load or store that has returned from
- * memory. */
- virtual bool recvTiming(PacketPtr pkt);
-
- /** Handles doing a retry of the previous send. */
- virtual void recvRetry();
- };
-
- /** Pointer to the D-cache. */
- DcachePort *dcachePort;
+ /** Pointer to the dcache port. Used only for sending. */
+ Port *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState
@@ -644,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
+ iewStage->decrWb(load_inst->seqNum);
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
@@ -658,7 +616,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
}
// If there's no forwarding case, then go access memory
- DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
load_inst->seqNum, load_inst->readPC());
assert(!load_inst->memData);
@@ -666,9 +624,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
++usedPorts;
- DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
- load_inst->readPC());
-
PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
data_pkt->dataStatic(load_inst->memData);
@@ -678,8 +633,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
state->inst = load_inst;
data_pkt->senderState = state;
- // if we have a cache, do cache access too
- if (!dcachePort->sendTiming(data_pkt)) {
+ // if we the cache is not blocked, do cache access
+ if (!lsq->cacheBlocked()) {
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // If the access didn't succeed, tell the LSQ by setting
+ // the retry thread id.
+ lsq->setRetryTid(lsqID);
+ }
+ }
+
+ // If the cache was blocked, or has become blocked due to the access,
+ // handle it.
+ if (lsq->cacheBlocked()) {
++lsqCacheBlocked;
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 8e951534f..4f5dbbf1c 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -31,6 +31,7 @@
#include "config/use_checker.hh"
+#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "base/str.hh"
#include "mem/packet.hh"
@@ -96,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
template <class Impl>
-Tick
-LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
- panic("O3CPU model does not work with atomic mode!");
- return curTick;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
-{
- panic("O3CPU doesn't expect recvFunctional callback!");
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
-{
- if (status == RangeChange)
- return;
-
- panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
- lsq->completeDataAccess(pkt);
- return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvRetry()
-{
- lsq->recvRetry();
-}
-
-template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
@@ -145,13 +106,15 @@ LSQUnit<Impl>::LSQUnit()
template<class Impl>
void
-LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
{
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
+ lsq = lsq_ptr;
+
lsqID = id;
// Add 1 for the sentinel entry (they are circular queues).
@@ -168,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
usedPorts = 0;
cachePorts = params->cachePorts;
- mem = params->mem;
-
memDepViolator = NULL;
blockedLoadSeqNum = 0;
@@ -180,7 +141,6 @@ void
LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
- dcachePort = new DcachePort(cpu, this);
#if USE_CHECKER
if (cpu->checker) {
@@ -588,7 +548,7 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
- if (isStoreBlocked) {
+ if (isStoreBlocked || lsq->cacheBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
break;
@@ -830,6 +790,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
// Squashed instructions do not need to complete their access.
if (inst->isSquashed()) {
+ iewStage->decrWb(inst->seqNum);
assert(!inst->isStore());
++lsqIgnoredResponses;
return;
@@ -911,6 +872,7 @@ LSQUnit<Impl>::recvRetry()
} else {
// Still blocked!
++lsqCacheBlocked;
+ lsq->setRetryTid(lsqID);
}
} else if (isLoadBlocked) {
DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
diff --git a/src/cpu/o3/mips/cpu.cc b/src/cpu/o3/mips/cpu.cc
new file mode 100755
index 000000000..420f460b2
--- /dev/null
+++ b/src/cpu/o3/mips/cpu.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include "cpu/o3/mips/impl.hh"
+#include "cpu/o3/mips/cpu_impl.hh"
+#include "cpu/o3/mips/dyn_inst.hh"
+
+// Force instantiation of MipsO3CPU for all the implemntations that are
+// needed. Consider merging this and mips_dyn_inst.cc, and maybe all
+// classes that depend on a certain impl, into one file (mips_impl.cc?).
+template class MipsO3CPU<MipsSimpleImpl>;
diff --git a/src/cpu/o3/mips/cpu.hh b/src/cpu/o3/mips/cpu.hh
new file mode 100755
index 000000000..bf04b9f69
--- /dev/null
+++ b/src/cpu/o3/mips/cpu.hh
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#ifndef __CPU_O3_MIPS_CPU_HH__
+#define __CPU_O3_MIPS_CPU_HH__
+
+#include "arch/mips/regfile.hh"
+#include "arch/mips/syscallreturn.hh"
+#include "cpu/thread_context.hh"
+#include "cpu/o3/cpu.hh"
+#include "sim/byteswap.hh"
+#include "sim/faults.hh"
+
+class EndQuiesceEvent;
+namespace Kernel {
+ class Statistics;
+};
+
+class TranslatingPort;
+
+/**
+ * MipsO3CPU class. Derives from the FullO3CPU class, and
+ * implements all ISA and implementation specific functions of the
+ * CPU. This is the CPU class that is used for the SimObjects, and is
+ * what is given to the DynInsts. Most of its state exists in the
+ * FullO3CPU; the state is has is mainly for ISA specific
+ * functionality.
+ */
+template <class Impl>
+class MipsO3CPU : public FullO3CPU<Impl>
+{
+ public:
+ typedef O3ThreadState<Impl> ImplState;
+ typedef O3ThreadState<Impl> Thread;
+ typedef typename Impl::Params Params;
+
+ /** Constructs an MipsO3CPU with the given parameters. */
+ MipsO3CPU(Params *params);
+
+ /** Registers statistics. */
+ void regStats();
+
+ /** Translates instruction requestion in syscall emulation mode. */
+ Fault translateInstReq(RequestPtr &req, Thread *thread)
+ {
+ return thread->getProcessPtr()->pTable->translate(req);
+ }
+
+ /** Translates data read request in syscall emulation mode. */
+ Fault translateDataReadReq(RequestPtr &req, Thread *thread)
+ {
+ return thread->getProcessPtr()->pTable->translate(req);
+ }
+
+ /** Translates data write request in syscall emulation mode. */
+ Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
+ {
+ return thread->getProcessPtr()->pTable->translate(req);
+ }
+
+ /** Reads a miscellaneous register. */
+ TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid);
+
+ /** Reads a misc. register, including any side effects the read
+ * might have as defined by the architecture.
+ */
+ TheISA::MiscReg readMiscRegWithEffect(int misc_reg,
+ Fault &fault, unsigned tid);
+
+ /** Sets a miscellaneous register. */
+ Fault setMiscReg(int misc_reg, const TheISA::MiscReg &val, unsigned tid);
+
+ /** Sets a misc. register, including any side effects the write
+ * might have as defined by the architecture.
+ */
+ Fault setMiscRegWithEffect(int misc_reg,
+ const TheISA::MiscReg &val, unsigned tid);
+
+ /** Initiates a squash of all in-flight instructions for a given
+ * thread. The source of the squash is an external update of
+ * state through the TC.
+ */
+ void squashFromTC(unsigned tid);
+
+ /** Traps to handle given fault. */
+ void trap(Fault fault, unsigned tid);
+
+ /** Executes a syscall.
+ * @todo: Determine if this needs to be virtual.
+ */
+ void syscall(int64_t callnum, int tid);
+ /** Gets a syscall argument. */
+ TheISA::IntReg getSyscallArg(int i, int tid);
+
+ /** Used to shift args for indirect syscall. */
+ void setSyscallArg(int i, TheISA::IntReg val, int tid);
+
+ /** Sets the return value of a syscall. */
+ void setSyscallReturn(SyscallReturn return_value, int tid);
+
+ /** CPU read function, forwards read to LSQ. */
+ template <class T>
+ Fault read(RequestPtr &req, T &data, int load_idx)
+ {
+ return this->iew.ldstQueue.read(req, data, load_idx);
+ }
+
+ /** CPU write function, forwards write to LSQ. */
+ template <class T>
+ Fault write(RequestPtr &req, T &data, int store_idx)
+ {
+ return this->iew.ldstQueue.write(req, data, store_idx);
+ }
+
+ Addr lockAddr;
+
+ /** Temporary fix for the lock flag, works in the UP case. */
+ bool lockFlag;
+};
+
+#endif // __CPU_O3_MIPS_CPU_HH__
diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc
new file mode 100644
index 000000000..f1c3b33a5
--- /dev/null
+++ b/src/cpu/o3/mips/cpu_builder.cc
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include <string>
+
+#include "cpu/base.hh"
+#include "cpu/o3/mips/cpu.hh"
+#include "cpu/o3/mips/impl.hh"
+#include "cpu/o3/mips/params.hh"
+#include "cpu/o3/fu_pool.hh"
+#include "sim/builder.hh"
+
+class DerivO3CPU : public MipsO3CPU<MipsSimpleImpl>
+{
+ public:
+ DerivO3CPU(MipsSimpleParams *p)
+ : MipsO3CPU<MipsSimpleImpl>(p)
+ { }
+};
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
+
+Param<int> clock;
+Param<int> numThreads;
+Param<int> activity;
+
+SimObjectVectorParam<Process *> workload;
+
+SimObjectParam<MemObject *> mem;
+
+SimObjectParam<BaseCPU *> checker;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+Param<unsigned> cachePorts;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> dispatchWidth;
+Param<unsigned> issueWidth;
+Param<unsigned> wbWidth;
+Param<unsigned> wbDepth;
+SimObjectParam<FUPool *> fuPool;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+Param<Tick> trapLatency;
+
+Param<unsigned> backComSize;
+Param<unsigned> forwardComSize;
+
+Param<std::string> predType;
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string> smtFetchPolicy;
+Param<std::string> smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string> smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string> smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string> smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
+
+ INIT_PARAM(clock, "clock speed"),
+ INIT_PARAM(numThreads, "number of HW thread contexts"),
+ INIT_PARAM_DFLT(activity, "Initial activity count", 0),
+
+ INIT_PARAM(workload, "Processes to run"),
+
+ INIT_PARAM(mem, "Memory"),
+
+ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
+ INIT_PARAM_DFLT(max_insts_any_thread,
+ "Terminate when any thread reaches this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_insts_all_threads,
+ "Terminate when all threads have reached"
+ "this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_any_thread,
+ "Terminate when any thread reaches this load count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_all_threads,
+ "Terminate when all threads have reached this load"
+ "count",
+ 0),
+
+ INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+
+ INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ "delay"),
+ INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ INIT_PARAM(fetchWidth, "Fetch width"),
+ INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ "delay"),
+ INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ INIT_PARAM(decodeWidth, "Decode width"),
+
+ INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ "delay"),
+ INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ INIT_PARAM(renameWidth, "Rename width"),
+
+ INIT_PARAM(commitToIEWDelay, "Commit to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(renameToIEWDelay, "Rename to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ "to the IEW stage)"),
+ INIT_PARAM(dispatchWidth, "Dispatch width"),
+ INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(wbWidth, "Writeback width"),
+ INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
+ INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
+
+ INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ "delay"),
+ INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ INIT_PARAM(commitWidth, "Commit width"),
+ INIT_PARAM(squashWidth, "Squash width"),
+ INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
+
+ INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
+
+ INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
+ INIT_PARAM(localPredictorSize, "Size of local predictor"),
+ INIT_PARAM(localCtrBits, "Bits per counter"),
+ INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+ INIT_PARAM(localHistoryBits, "Bits for the local history"),
+ INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+ INIT_PARAM(globalCtrBits, "Bits per counter"),
+ INIT_PARAM(globalHistoryBits, "Bits of history"),
+ INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+ INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+ INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+ INIT_PARAM(RASSize, "RAS size"),
+
+ INIT_PARAM(LQEntries, "Number of load queue entries"),
+ INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ INIT_PARAM(SSITSize, "Store set ID table size"),
+
+ INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ "registers"),
+ INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+ INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+ INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+ INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+ INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+ INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+ INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+ INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
+
+CREATE_SIM_OBJECT(DerivO3CPU)
+{
+ DerivO3CPU *cpu;
+
+ // In non-full-system mode, we infer the number of threads from
+ // the workload if it's not explicitly specified.
+ int actual_num_threads =
+ (numThreads.isValid() && numThreads >= workload.size()) ?
+ numThreads : workload.size();
+
+ if (workload.size() == 0) {
+ fatal("Must specify at least one workload!");
+ }
+
+ MipsSimpleParams *params = new MipsSimpleParams;
+
+ params->clock = clock;
+
+ params->name = getInstanceName();
+ params->numberOfThreads = actual_num_threads;
+ params->activity = activity;
+
+ params->workload = workload;
+
+ params->mem = mem;
+
+ params->checker = checker;
+
+ params->max_insts_any_thread = max_insts_any_thread;
+ params->max_insts_all_threads = max_insts_all_threads;
+ params->max_loads_any_thread = max_loads_any_thread;
+ params->max_loads_all_threads = max_loads_all_threads;
+
+ //
+ // Caches
+ //
+ params->cachePorts = cachePorts;
+
+ params->decodeToFetchDelay = decodeToFetchDelay;
+ params->renameToFetchDelay = renameToFetchDelay;
+ params->iewToFetchDelay = iewToFetchDelay;
+ params->commitToFetchDelay = commitToFetchDelay;
+ params->fetchWidth = fetchWidth;
+
+ params->renameToDecodeDelay = renameToDecodeDelay;
+ params->iewToDecodeDelay = iewToDecodeDelay;
+ params->commitToDecodeDelay = commitToDecodeDelay;
+ params->fetchToDecodeDelay = fetchToDecodeDelay;
+ params->decodeWidth = decodeWidth;
+
+ params->iewToRenameDelay = iewToRenameDelay;
+ params->commitToRenameDelay = commitToRenameDelay;
+ params->decodeToRenameDelay = decodeToRenameDelay;
+ params->renameWidth = renameWidth;
+
+ params->commitToIEWDelay = commitToIEWDelay;
+ params->renameToIEWDelay = renameToIEWDelay;
+ params->issueToExecuteDelay = issueToExecuteDelay;
+ params->dispatchWidth = dispatchWidth;
+ params->issueWidth = issueWidth;
+ params->wbWidth = wbWidth;
+ params->wbDepth = wbDepth;
+ params->fuPool = fuPool;
+
+ params->iewToCommitDelay = iewToCommitDelay;
+ params->renameToROBDelay = renameToROBDelay;
+ params->commitWidth = commitWidth;
+ params->squashWidth = squashWidth;
+ params->trapLatency = trapLatency;
+
+ params->backComSize = backComSize;
+ params->forwardComSize = forwardComSize;
+
+ params->predType = predType;
+ params->localPredictorSize = localPredictorSize;
+ params->localCtrBits = localCtrBits;
+ params->localHistoryTableSize = localHistoryTableSize;
+ params->localHistoryBits = localHistoryBits;
+ params->globalPredictorSize = globalPredictorSize;
+ params->globalCtrBits = globalCtrBits;
+ params->globalHistoryBits = globalHistoryBits;
+ params->choicePredictorSize = choicePredictorSize;
+ params->choiceCtrBits = choiceCtrBits;
+
+ params->BTBEntries = BTBEntries;
+ params->BTBTagSize = BTBTagSize;
+
+ params->RASSize = RASSize;
+
+ params->LQEntries = LQEntries;
+ params->SQEntries = SQEntries;
+
+ params->SSITSize = SSITSize;
+ params->LFSTSize = LFSTSize;
+
+ params->numPhysIntRegs = numPhysIntRegs;
+ params->numPhysFloatRegs = numPhysFloatRegs;
+ params->numIQEntries = numIQEntries;
+ params->numROBEntries = numROBEntries;
+
+ params->smtNumFetchingThreads = smtNumFetchingThreads;
+
+ // Default smtFetchPolicy to "RoundRobin", if necessary.
+ std::string round_robin_policy = "RoundRobin";
+ std::string single_thread = "SingleThread";
+
+ if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0)
+ params->smtFetchPolicy = round_robin_policy;
+ else
+ params->smtFetchPolicy = smtFetchPolicy;
+
+ params->smtIQPolicy = smtIQPolicy;
+ params->smtLSQPolicy = smtLSQPolicy;
+ params->smtLSQThreshold = smtLSQThreshold;
+ params->smtROBPolicy = smtROBPolicy;
+ params->smtROBThreshold = smtROBThreshold;
+ params->smtCommitPolicy = smtCommitPolicy;
+
+ params->instShiftAmt = 2;
+
+ params->deferRegistration = defer_registration;
+
+ params->functionTrace = function_trace;
+ params->functionTraceStart = function_trace_start;
+
+ cpu = new DerivO3CPU(params);
+
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU)
+
diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh
new file mode 100644
index 000000000..e08741626
--- /dev/null
+++ b/src/cpu/o3/mips/cpu_impl.hh
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include "config/use_checker.hh"
+
+#include "arch/mips/faults.hh"
+#include "base/cprintf.hh"
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/checker/thread_context.hh"
+#include "sim/sim_events.hh"
+#include "sim/stats.hh"
+
+#include "cpu/o3/mips/cpu.hh"
+#include "cpu/o3/mips/params.hh"
+#include "cpu/o3/mips/thread_context.hh"
+#include "cpu/o3/comm.hh"
+#include "cpu/o3/thread_state.hh"
+
+template <class Impl>
+MipsO3CPU<Impl>::MipsO3CPU(Params *params)
+ : FullO3CPU<Impl>(params)
+{
+ DPRINTF(O3CPU, "Creating MipsO3CPU object.\n");
+
+ // Setup any thread state.
+ this->thread.resize(this->numThreads);
+
+ for (int i = 0; i < this->numThreads; ++i) {
+ if (i < params->workload.size()) {
+ DPRINTF(O3CPU, "Workload[%i] process is %#x",
+ i, this->thread[i]);
+ this->thread[i] = new Thread(this, i, params->workload[i],
+ i, params->mem);
+
+ this->thread[i]->setStatus(ThreadContext::Suspended);
+
+
+ /* Use this port to for syscall emulation writes to memory. */
+ Port *mem_port;
+ TranslatingPort *trans_port;
+ trans_port = new TranslatingPort(csprintf("%s-%d-funcport",
+ name(), i),
+ params->workload[i]->pTable,
+ false);
+ mem_port = params->mem->getPort("functional");
+ mem_port->setPeer(trans_port);
+ trans_port->setPeer(mem_port);
+ this->thread[i]->setMemPort(trans_port);
+
+ //usedTids[i] = true;
+ //threadMap[i] = i;
+ } else {
+ //Allocate Empty thread so M5 can use later
+ //when scheduling threads to CPU
+ Process* dummy_proc = NULL;
+
+ this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem);
+ //usedTids[i] = false;
+ }
+
+ ThreadContext *tc;
+
+ // Setup the TC that will serve as the interface to the threads/CPU.
+ MipsTC<Impl> *mips_tc =
+ new MipsTC<Impl>;
+
+ tc = mips_tc;
+
+ // If we're using a checker, then the TC should be the
+ // CheckerThreadContext.
+#if USE_CHECKER
+ if (params->checker) {
+ tc = new CheckerThreadContext<MipsTC<Impl> >(
+ mips_tc, this->checker);
+ }
+#endif
+
+ mips_tc->cpu = this;
+ mips_tc->thread = this->thread[i];
+
+ // Give the thread the TC.
+ this->thread[i]->tc = tc;
+
+ // Add the TC to the CPU's list of TC's.
+ this->threadContexts.push_back(tc);
+ }
+
+ for (int i=0; i < this->numThreads; i++) {
+ this->thread[i]->setFuncExeInst(0);
+ }
+
+ // Sets CPU pointers. These must be set at this level because the CPU
+ // pointers are defined to be the highest level of CPU class.
+ this->fetch.setCPU(this);
+ this->decode.setCPU(this);
+ this->rename.setCPU(this);
+ this->iew.setCPU(this);
+ this->commit.setCPU(this);
+
+ this->rob.setCPU(this);
+ this->regFile.setCPU(this);
+
+ lockAddr = 0;
+ lockFlag = false;
+}
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::regStats()
+{
+ // Register stats for everything that has stats.
+ this->fullCPURegStats();
+ this->fetch.regStats();
+ this->decode.regStats();
+ this->rename.regStats();
+ this->iew.regStats();
+ this->commit.regStats();
+}
+
+
+template <class Impl>
+MiscReg
+MipsO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid)
+{
+ return this->regFile.readMiscReg(misc_reg, tid);
+}
+
+template <class Impl>
+MiscReg
+MipsO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault,
+ unsigned tid)
+{
+ return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid);
+}
+
+template <class Impl>
+Fault
+MipsO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+{
+ return this->regFile.setMiscReg(misc_reg, val, tid);
+}
+
+template <class Impl>
+Fault
+MipsO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
+ unsigned tid)
+{
+ return this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
+}
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::squashFromTC(unsigned tid)
+{
+ this->thread[tid]->inSyscall = true;
+ this->commit.generateTCEvent(tid);
+}
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::trap(Fault fault, unsigned tid)
+{
+ // Pass the thread's TC into the invoke method.
+ fault->invoke(this->threadContexts[tid]);
+}
+
+#if !FULL_SYSTEM
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::syscall(int64_t callnum, int tid)
+{
+ DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid);
+
+ DPRINTF(Activity,"Activity: syscall() called.\n");
+
+ // Temporarily increase this by one to account for the syscall
+ // instruction.
+ ++(this->thread[tid]->funcExeInst);
+
+ // Execute the actual syscall.
+ this->thread[tid]->syscall(callnum);
+
+ // Decrease funcExeInst by one as the normal commit will handle
+ // incrementing it.
+ --(this->thread[tid]->funcExeInst);
+
+ DPRINTF(O3CPU, "[tid:%i] Register 2 is %i ", tid, this->readIntReg(2));
+}
+
+template <class Impl>
+TheISA::IntReg
+MipsO3CPU<Impl>::getSyscallArg(int i, int tid)
+{
+ return this->readArchIntReg(MipsISA::ArgumentReg0 + i, tid);
+}
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+{
+ this->setArchIntReg(MipsISA::ArgumentReg0 + i, val, tid);
+}
+
+template <class Impl>
+void
+MipsO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
+{
+ // check for error condition.
+ if (return_value.successful()) {
+ // no error
+ this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
+ this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
+ } else {
+ // got an error, return details
+ this->setArchIntReg(TheISA::SyscallSuccessReg,
+ (TheISA::IntReg) -1, tid);
+ this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
+ }
+}
+#endif
diff --git a/src/cpu/o3/mips/dyn_inst.cc b/src/cpu/o3/mips/dyn_inst.cc
new file mode 100755
index 000000000..216aa7d2c
--- /dev/null
+++ b/src/cpu/o3/mips/dyn_inst.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include "cpu/o3/mips/dyn_inst_impl.hh"
+#include "cpu/o3/mips/impl.hh"
+
+// Force instantiation of MipsDynInst for all the implementations that
+// are needed.
+template class MipsDynInst<MipsSimpleImpl>;
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
new file mode 100755
index 000000000..06bdfcec4
--- /dev/null
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#ifndef __CPU_O3_MIPS_DYN_INST_HH__
+#define __CPU_O3_MIPS_DYN_INST_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/o3/mips/cpu.hh"
+#include "cpu/o3/mips/impl.hh"
+
+class Packet;
+
+/**
+ * Mostly implementation & ISA specific MipsDynInst. As with most
+ * other classes in the new CPU model, it is templated on the Impl to
+ * allow for passing in of all types, such as the CPU type and the ISA
+ * type. The MipsDynInst serves as the primary interface to the CPU
+ * for instructions that are executing.
+ */
+template <class Impl>
+class MipsDynInst : public BaseDynInst<Impl>
+{
+ public:
+ /** Typedef for the CPU. */
+ typedef typename Impl::O3CPU O3CPU;
+
+ /** Binary machine instruction type. */
+ typedef TheISA::MachInst MachInst;
+ /** Extended machine instruction type. */
+ typedef TheISA::ExtMachInst ExtMachInst;
+ /** Logical register index type. */
+ typedef TheISA::RegIndex RegIndex;
+ /** Integer register index type. */
+ typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+ /** Misc register index type. */
+ typedef TheISA::MiscReg MiscReg;
+
+ enum {
+ MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
+ MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs
+ };
+
+ public:
+ /** BaseDynInst constructor given a binary instruction. */
+ MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
+ O3CPU *cpu);
+
+ /** BaseDynInst constructor given a static inst pointer. */
+ MipsDynInst(StaticInstPtr &_staticInst);
+
+ /** Executes the instruction.*/
+ Fault execute();
+
+ /** Initiates the access. Only valid for memory operations. */
+ Fault initiateAcc();
+
+ /** Completes the access. Only valid for memory operations. */
+ Fault completeAcc(Packet *pkt);
+
+ private:
+ /** Initializes variables. */
+ void initVars();
+
+ public:
+ /** Reads a miscellaneous register. */
+ MiscReg readMiscReg(int misc_reg)
+ {
+ return this->cpu->readMiscReg(misc_reg, this->threadNumber);
+ }
+
+ /** Reads a misc. register, including any side-effects the read
+ * might have as defined by the architecture.
+ */
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ {
+ return this->cpu->readMiscRegWithEffect(misc_reg, fault,
+ this->threadNumber);
+ }
+
+ /** Sets a misc. register. */
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
+ this->instResult.integer = val;
+ return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
+ }
+
+ /** Sets a misc. register, including any side-effects the write
+ * might have as defined by the architecture.
+ */
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
+ return this->cpu->setMiscRegWithEffect(misc_reg, val,
+ this->threadNumber);
+ }
+
+ /** Calls a syscall. */
+ void syscall(int64_t callnum);
+
+ private:
+ /** Physical register index of the destination registers of this
+ * instruction.
+ */
+ PhysRegIndex _destRegIdx[MaxInstDestRegs];
+
+ /** Physical register index of the source registers of this
+ * instruction.
+ */
+ PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
+
+ /** Physical register index of the previous producers of the
+ * architected destinations.
+ */
+ PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
+
+ public:
+
+ // The register accessor methods provide the index of the
+ // instruction's operand (e.g., 0 or 1), not the architectural
+ // register index, to simplify the implementation of register
+ // renaming. We find the architectural register index by indexing
+ // into the instruction's own operand index table. Note that a
+ // raw pointer to the StaticInst is provided instead of a
+ // ref-counted StaticInstPtr to redice overhead. This is fine as
+ // long as these methods don't copy the pointer into any long-term
+ // storage (which is pretty hard to imagine they would have reason
+ // to do).
+
+ uint64_t readIntReg(const StaticInst *si, int idx)
+ {
+ return this->cpu->readIntReg(_srcRegIdx[idx]);
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+ {
+ return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx)
+ {
+ return this->cpu->readFloatReg(_srcRegIdx[idx]);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width)
+ {
+ return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+ {
+ return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+ }
+
+ /** @todo: Make results into arrays so they can handle multiple dest
+ * registers.
+ */
+ void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ {
+ this->cpu->setIntReg(_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setIntReg(si, idx, val);
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+ {
+ this->cpu->setFloatReg(_destRegIdx[idx], val, width);
+ BaseDynInst<Impl>::setFloatReg(si, idx, val, width);
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val)
+ {
+ this->cpu->setFloatReg(_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setFloatReg(si, idx, val);
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx,
+ FloatRegBits val, int width)
+ {
+ this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+ BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val)
+ {
+ this->cpu->setFloatRegBits(_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
+ }
+
+ /** Returns the physical register index of the i'th destination
+ * register.
+ */
+ PhysRegIndex renamedDestRegIdx(int idx) const
+ {
+ return _destRegIdx[idx];
+ }
+
+ /** Returns the physical register index of the i'th source register. */
+ PhysRegIndex renamedSrcRegIdx(int idx) const
+ {
+ return _srcRegIdx[idx];
+ }
+
+ /** Returns the physical register index of the previous physical register
+ * that remapped to the same logical register index.
+ */
+ PhysRegIndex prevDestRegIdx(int idx) const
+ {
+ return _prevDestRegIdx[idx];
+ }
+
+ /** Renames a destination register to a physical register. Also records
+ * the previous physical register that the logical register mapped to.
+ */
+ void renameDestReg(int idx,
+ PhysRegIndex renamed_dest,
+ PhysRegIndex previous_rename)
+ {
+ _destRegIdx[idx] = renamed_dest;
+ _prevDestRegIdx[idx] = previous_rename;
+ }
+
+ /** Renames a source logical register to the physical register which
+ * has/will produce that logical register's result.
+ * @todo: add in whether or not the source register is ready.
+ */
+ void renameSrcReg(int idx, PhysRegIndex renamed_src)
+ {
+ _srcRegIdx[idx] = renamed_src;
+ }
+
+ public:
+ /** Calculates EA part of a memory instruction. Currently unused,
+ * though it may be useful in the future if we want to split
+ * memory operations into EA calculation and memory access parts.
+ */
+ Fault calcEA()
+ {
+ return this->staticInst->eaCompInst()->execute(this, this->traceData);
+ }
+
+ /** Does the memory access part of a memory instruction. Currently unused,
+ * though it may be useful in the future if we want to split
+ * memory operations into EA calculation and memory access parts.
+ */
+ Fault memAccess()
+ {
+ return this->staticInst->memAccInst()->execute(this, this->traceData);
+ }
+};
+
+#endif // __CPU_O3_MIPS_DYN_INST_HH__
+
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
new file mode 100755
index 000000000..57dec1ccf
--- /dev/null
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ */
+
+#include "cpu/o3/mips/dyn_inst.hh"
+
+template <class Impl>
+MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+ InstSeqNum seq_num, O3CPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+{
+ initVars();
+}
+
+template <class Impl>
+MipsDynInst<Impl>::MipsDynInst(StaticInstPtr &_staticInst)
+ : BaseDynInst<Impl>(_staticInst)
+{
+ initVars();
+}
+
+template <class Impl>
+void
+MipsDynInst<Impl>::initVars()
+{
+ // Make sure to have the renamed register entries set to the same
+ // as the normal register entries. It will allow the IQ to work
+ // without any modifications.
+ for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
+ _destRegIdx[i] = this->staticInst->destRegIdx(i);
+ }
+
+ for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
+ _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+ this->_readySrcRegIdx[i] = 0;
+ }
+}
+
+template <class Impl>
+Fault
+MipsDynInst<Impl>::execute()
+{
+ // @todo: Pretty convoluted way to avoid squashing from happening
+ // when using the TC during an instruction's execution
+ // (specifically for instructions that have side-effects that use
+ // the TC). Fix this.
+ bool in_syscall = this->thread->inSyscall;
+ this->thread->inSyscall = true;
+
+ this->fault = this->staticInst->execute(this, this->traceData);
+
+ this->thread->inSyscall = in_syscall;
+
+ return this->fault;
+}
+
+template <class Impl>
+Fault
+MipsDynInst<Impl>::initiateAcc()
+{
+ // @todo: Pretty convoluted way to avoid squashing from happening
+ // when using the TC during an instruction's execution
+ // (specifically for instructions that have side-effects that use
+ // the TC). Fix this.
+ bool in_syscall = this->thread->inSyscall;
+ this->thread->inSyscall = true;
+
+ this->fault = this->staticInst->initiateAcc(this, this->traceData);
+
+ this->thread->inSyscall = in_syscall;
+
+ return this->fault;
+}
+
+template <class Impl>
+Fault
+MipsDynInst<Impl>::completeAcc(Packet *pkt)
+{
+ this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
+
+ return this->fault;
+}
+
+template <class Impl>
+void
+MipsDynInst<Impl>::syscall(int64_t callnum)
+{
+ this->cpu->syscall(callnum, this->threadNumber);
+}
+
diff --git a/src/cpu/o3/mips/impl.hh b/src/cpu/o3/mips/impl.hh
new file mode 100644
index 000000000..ac7181a19
--- /dev/null
+++ b/src/cpu/o3/mips/impl.hh
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#ifndef __CPU_O3_MIPS_IMPL_HH__
+#define __CPU_O3_MIPS_IMPL_HH__
+
+#include "arch/mips/isa_traits.hh"
+
+#include "cpu/o3/mips/params.hh"
+#include "cpu/o3/cpu_policy.hh"
+
+
+// Forward declarations.
+template <class Impl>
+class MipsDynInst;
+
+template <class Impl>
+class MipsO3CPU;
+
+/** Implementation specific struct that defines several key types to the
+ * CPU, the stages within the CPU, the time buffers, and the DynInst.
+ * The struct defines the ISA, the CPU policy, the specific DynInst, the
+ * specific O3CPU, and all of the structs from the time buffers to do
+ * communication.
+ * This is one of the key things that must be defined for each hardware
+ * specific CPU implementation.
+ */
+struct MipsSimpleImpl
+{
+ /** The type of MachInst. */
+ typedef TheISA::MachInst MachInst;
+
+ /** The CPU policy to be used, which defines all of the CPU stages. */
+ typedef SimpleCPUPolicy<MipsSimpleImpl> CPUPol;
+
+ /** The DynInst type to be used. */
+ typedef MipsDynInst<MipsSimpleImpl> DynInst;
+
+ /** The refcounted DynInst pointer to be used. In most cases this is
+ * what should be used, and not DynInst *.
+ */
+ typedef RefCountingPtr<DynInst> DynInstPtr;
+
+ /** The O3CPU type to be used. */
+ typedef MipsO3CPU<MipsSimpleImpl> O3CPU;
+
+ /** Same typedef, but for CPUType. BaseDynInst may not always use
+ * an O3 CPU, so it's clearer to call it CPUType instead in that
+ * case.
+ */
+ typedef O3CPU CPUType;
+
+ /** The Params to be passed to each stage. */
+ typedef MipsSimpleParams Params;
+
+ enum {
+ MaxWidth = 8,
+ MaxThreads = 4
+ };
+};
+
+/** The O3Impl to be used. */
+typedef MipsSimpleImpl O3CPUImpl;
+
+#endif // __CPU_O3_MIPS_IMPL_HH__
diff --git a/src/cpu/o3/mips/params.hh b/src/cpu/o3/mips/params.hh
new file mode 100644
index 000000000..d1ac62e21
--- /dev/null
+++ b/src/cpu/o3/mips/params.hh
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#ifndef __CPU_O3_MIPS_PARAMS_HH__
+#define __CPU_O3_MIPS_PARAMS_HH__
+
+#include "cpu/o3/cpu.hh"
+#include "cpu/o3/params.hh"
+
+//Forward declarations
+//class MipsDTB;
+//class MipsITB;
+class MemObject;
+class Process;
+class System;
+
+/**
+ * This file defines the parameters that will be used for the MipsO3CPU.
+ * This must be defined externally so that the Impl can have a params class
+ * defined that it can pass to all of the individual stages.
+ */
+
+class MipsSimpleParams : public O3Params
+{
+ public:
+ MipsSimpleParams() {}
+
+#if FULL_SYSTEM
+ //Full System Paramater Objects place here
+ MipsITB *itb;
+ MipsDTB *dtb;
+#endif
+};
+
+#endif // __CPU_O3_MIPS_PARAMS_HH__
diff --git a/src/cpu/o3/mips/thread_context.cc b/src/cpu/o3/mips/thread_context.cc
new file mode 100755
index 000000000..0061a2a63
--- /dev/null
+++ b/src/cpu/o3/mips/thread_context.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include "cpu/o3/thread_context.hh"
+#include "cpu/o3/thread_context_impl.hh"
+
+template class O3ThreadContext<MipsSimpleImpl>;
+
diff --git a/src/cpu/o3/mips/thread_context.hh b/src/cpu/o3/mips/thread_context.hh
new file mode 100644
index 000000000..26b1e2e7f
--- /dev/null
+++ b/src/cpu/o3/mips/thread_context.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ * Korey Sewell
+ */
+
+#include "arch/mips/types.hh"
+#include "cpu/o3/thread_context.hh"
+
+template <class Impl>
+class MipsTC : public O3ThreadContext<Impl>
+{
+ public:
+ virtual uint64_t readNextNPC()
+ {
+ return this->cpu->readNextNPC(this->thread->readTid());
+ }
+
+ virtual void setNextNPC(uint64_t val)
+ {
+ this->cpu->setNextNPC(val, this->thread->readTid());
+ }
+
+ virtual void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val)
+ { panic("Not supported on Mips!"); }
+
+ /** This function exits the thread context in the CPU and returns
+ * 1 if the CPU has no more active threads (meaning it's OK to exit);
+ * Used in syscall-emulation mode when a thread executes the 'exit'
+ * syscall.
+ */
+ virtual int exit()
+ {
+ this->deallocate();
+
+ // If there are still threads executing in the system
+ if (this->cpu->numActiveThreads())
+ return 0; // don't exit simulation
+ else
+ return 1; // exit simulation
+ }
+};
diff --git a/src/cpu/o3/ras.hh b/src/cpu/o3/ras.hh
index 5c8a93285..97846ed16 100644
--- a/src/cpu/o3/ras.hh
+++ b/src/cpu/o3/ras.hh
@@ -31,8 +31,7 @@
#ifndef __CPU_O3_RAS_HH__
#define __CPU_O3_RAS_HH__
-// For Addr type.
-#include "arch/isa_traits.hh"
+#include "sim/host.hh"
#include <vector>
/** Return address stack class, implements a simple RAS. */
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index b6677b4b1..512cf0721 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -33,11 +33,11 @@
#define __CPU_O3_REGFILE_HH__
#include "arch/isa_traits.hh"
-#include "arch/faults.hh"
#include "arch/types.hh"
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/o3/comm.hh"
+#include "sim/faults.hh"
#if FULL_SYSTEM
#include "kern/kernel_stats.hh"
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 034087feb..ba26a01dd 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -76,6 +76,7 @@ class DefaultRename
// using a list instead of a queue. (Most other stages use a
// queue)
typedef std::list<DynInstPtr> InstQueue;
+ typedef typename std::list<DynInstPtr>::iterator ListIt;
public:
/** Overall rename status. Used to determine if the CPU can
@@ -170,7 +171,7 @@ class DefaultRename
void takeOverFrom();
/** Squashes all instructions in a thread. */
- void squash(unsigned tid);
+ void squash(const InstSeqNum &squash_seq_num, unsigned tid);
/** Ticks rename, which processes all input signals and attempts to rename
* as many instructions as possible.
@@ -222,7 +223,7 @@ class DefaultRename
bool unblock(unsigned tid);
/** Executes actual squash, removing squashed instructions. */
- void doSquash(unsigned tid);
+ void doSquash(const InstSeqNum &squash_seq_num, unsigned tid);
/** Removes a committed instruction's rename history. */
void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid);
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 805a72808..892eb12cf 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -26,6 +26,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
+ * Korey Sewell
*/
#include <list>
@@ -33,8 +34,6 @@
#include "config/full_system.hh"
#include "cpu/o3/rename.hh"
-using namespace std;
-
template <class Impl>
DefaultRename<Impl>::DefaultRename(Params *params)
: iewToRenameDelay(params->iewToRenameDelay),
@@ -222,7 +221,7 @@ DefaultRename<Impl>::initStage()
template<class Impl>
void
-DefaultRename<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(Rename, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -271,7 +270,8 @@ DefaultRename<Impl>::switchOut()
{
// Clear any state, fix up the rename map.
for (int i = 0; i < numThreads; i++) {
- typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
+ typename std::list<RenameHistory>::iterator hb_it =
+ historyBuffer[i].begin();
while (!historyBuffer[i].empty()) {
assert(hb_it != historyBuffer[i].end());
@@ -318,7 +318,7 @@ DefaultRename<Impl>::takeOverFrom()
template <class Impl>
void
-DefaultRename<Impl>::squash(unsigned tid)
+DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
{
DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid);
@@ -341,19 +341,55 @@ DefaultRename<Impl>::squash(unsigned tid)
unsigned squashCount = 0;
for (int i=0; i<fromDecode->size; i++) {
- if (fromDecode->insts[i]->threadNumber == tid) {
+ if (fromDecode->insts[i]->threadNumber == tid &&
+ fromDecode->insts[i]->seqNum > squash_seq_num) {
fromDecode->insts[i]->setSquashed();
wroteToTimeBuffer = true;
squashCount++;
}
+
}
+ // Clear the instruction list and skid buffer in case they have any
+ // insts in them. Since we support multiple ISAs, we cant just:
+ // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
+ // a possible delay slot inst for different architectures
+ // insts[tid].clear();
+#if THE_ISA == ALPHA_ISA
insts[tid].clear();
+#else
+ DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
+ "[sn:%i].\n",tid, squash_seq_num);
+ ListIt ilist_it = insts[tid].begin();
+ while (ilist_it != insts[tid].end()) {
+ if ((*ilist_it)->seqNum > squash_seq_num) {
+ (*ilist_it)->setSquashed();
+ DPRINTF(Rename, "Squashing incoming decode instruction, "
+ "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
+ }
+ ilist_it++;
+ }
+#endif
// Clear the skid buffer in case it has any data in it.
+ // See comments above.
+ // skidBuffer[tid].clear();
+#if THE_ISA == ALPHA_ISA
skidBuffer[tid].clear();
-
- doSquash(tid);
+#else
+ DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
+ "until [sn:%i].\n", tid, squash_seq_num);
+ ListIt slist_it = skidBuffer[tid].begin();
+ while (slist_it != skidBuffer[tid].end()) {
+ if ((*slist_it)->seqNum > squash_seq_num) {
+ (*slist_it)->setSquashed();
+ DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
+ "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
+ }
+ slist_it++;
+ }
+#endif
+ doSquash(squash_seq_num, tid);
}
template <class Impl>
@@ -370,7 +406,7 @@ DefaultRename<Impl>::tick()
sortInsts();
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
// Check stall and squash signals.
while (threads != (*activeThreads).end()) {
@@ -572,7 +608,7 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
if (inst->isSquashed()) {
DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is "
"squashed, skipping.\n",
- tid, inst->seqNum, inst->threadNumber,inst->readPC());
+ tid, inst->seqNum, inst->readPC());
++renameSquashedInsts;
@@ -707,9 +743,11 @@ DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
#ifdef DEBUG
+#if THE_ISA == ALPHA_ISA
for (int i=0; i < numThreads; i++)
assert(insts[i].empty());
#endif
+#endif
for (int i = 0; i < insts_from_decode; ++i) {
DynInstPtr inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
@@ -720,7 +758,7 @@ template<class Impl>
bool
DefaultRename<Impl>::skidsEmpty()
{
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
if (!skidBuffer[*threads++].empty())
@@ -736,7 +774,7 @@ DefaultRename<Impl>::updateStatus()
{
bool any_unblocking = false;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
threads = (*activeThreads).begin();
@@ -824,11 +862,10 @@ DefaultRename<Impl>::unblock(unsigned tid)
template <class Impl>
void
-DefaultRename<Impl>::doSquash(unsigned tid)
+DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
{
- typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].begin();
-
- InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
+ typename std::list<RenameHistory>::iterator hb_it =
+ historyBuffer[tid].begin();
// After a syscall squashes everything, the history buffer may be empty
// but the ROB may still be squashing instructions.
@@ -866,7 +903,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
"history buffer %u (size=%i), until [sn:%lli].\n",
tid, tid, historyBuffer[tid].size(), inst_seq_num);
- typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].end();
+ typename std::list<RenameHistory>::iterator hb_it =
+ historyBuffer[tid].end();
--hb_it;
@@ -963,8 +1001,9 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
historyBuffer[tid].push_front(hb_entry);
- DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, "
- "[sn:%lli].\n",tid,
+ DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer "
+ "(size=%i), [sn:%lli].\n",tid,
+ historyBuffer[tid].size(),
(*historyBuffer[tid].begin()).instSeqNum);
// Tell the instruction to rename the appropriate destination
@@ -1143,7 +1182,13 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
"commit.\n", tid);
- squash(tid);
+#if THE_ISA == ALPHA_ISA
+ InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
+#else
+ InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+#endif
+
+ squash(squashed_seq_num, tid);
return true;
}
@@ -1258,7 +1303,7 @@ template <class Impl>
void
DefaultRename<Impl>::dumpHistory()
{
- typename list<RenameHistory>::iterator buf_it;
+ typename std::list<RenameHistory>::iterator buf_it;
for (int i = 0; i < numThreads; i++) {
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index c4c90c99a..896c66f3e 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -40,8 +40,7 @@
#include <vector>
#include "cpu/o3/free_list.hh"
-//For RegIndex
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
class SimpleRenameMap
{
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index 1b9f666b8..fab114a74 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -32,11 +32,11 @@
#include "config/full_system.hh"
#include "cpu/o3/rob.hh"
-using namespace std;
+#include <list>
template <class Impl>
ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
- string _smtROBPolicy, unsigned _smtROBThreshold,
+ std::string _smtROBPolicy, unsigned _smtROBThreshold,
unsigned _numThreads)
: numEntries(_numEntries),
squashWidth(_squashWidth),
@@ -49,7 +49,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
threadEntries[tid] = 0;
}
- string policy = _smtROBPolicy;
+ std::string policy = _smtROBPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
@@ -118,7 +118,7 @@ ROB<Impl>::setCPU(O3CPU *cpu_ptr)
template <class Impl>
void
-ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+ROB<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
DPRINTF(ROB, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
@@ -157,8 +157,8 @@ ROB<Impl>::resetEntries()
if (robPolicy != Dynamic || numThreads > 1) {
int active_threads = (*activeThreads).size();
- list<unsigned>::iterator threads = (*activeThreads).begin();
- list<unsigned>::iterator list_end = (*activeThreads).end();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator list_end = (*activeThreads).end();
while (threads != list_end) {
if (robPolicy == Partitioned) {
@@ -318,7 +318,7 @@ bool
ROB<Impl>::canCommit()
{
//@todo: set ActiveThreads through ROB or CPU
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
@@ -432,7 +432,7 @@ ROB<Impl>::updateHead()
bool first_valid = true;
// @todo: set ActiveThreads through ROB or CPU
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned thread_num = *threads++;
@@ -472,7 +472,7 @@ ROB<Impl>::updateTail()
tail = instList[0].end();
bool first_valid = true;
- list<unsigned>::iterator threads = (*activeThreads).begin();
+ std::list<unsigned>::iterator threads = (*activeThreads).begin();
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh
index f8e4df3b7..eefff1d8b 100644
--- a/src/cpu/o3/scoreboard.hh
+++ b/src/cpu/o3/scoreboard.hh
@@ -35,7 +35,6 @@
#include <iostream>
#include <utility>
#include <vector>
-#include "arch/alpha/isa_traits.hh"
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/comm.hh"
diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh
index f5a44a1ac..f9f7637d0 100644
--- a/src/cpu/o3/store_set.hh
+++ b/src/cpu/o3/store_set.hh
@@ -36,8 +36,8 @@
#include <utility>
#include <vector>
-#include "arch/isa_traits.hh"
#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
struct ltseqnum {
bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index df8d1a6d8..9ca02b9f3 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -26,12 +26,12 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
- * Korey Sewell
*/
#ifndef __CPU_O3_THREAD_CONTEXT_HH__
#define __CPU_O3_THREAD_CONTEXT_HH__
+#include "cpu/thread_context.hh"
#include "cpu/o3/isa_specific.hh"
class EndQuiesceEvent;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index bf8cbf850..a4546e669 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -32,8 +32,6 @@
#include "cpu/o3/thread_context.hh"
#include "cpu/quiesce_event.hh"
-using namespace TheISA;
-
#if FULL_SYSTEM
template <class Impl>
VirtualPort *
@@ -285,7 +283,7 @@ O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc)
}
// Copy the misc regs.
- copyMiscRegs(tc, this);
+ TheISA::copyMiscRegs(tc, this);
// Then finally set the PC and the next PC.
cpu->setPC(tc->readPC(), tid);
@@ -308,7 +306,7 @@ O3ThreadContext<Impl>::readIntReg(int reg_idx)
}
template <class Impl>
-FloatReg
+TheISA::FloatReg
O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width)
{
switch(width) {
@@ -323,14 +321,14 @@ O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width)
}
template <class Impl>
-FloatReg
+TheISA::FloatReg
O3ThreadContext<Impl>::readFloatReg(int reg_idx)
{
return cpu->readArchFloatRegSingle(reg_idx, thread->readTid());
}
template <class Impl>
-FloatRegBits
+TheISA::FloatRegBits
O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width)
{
DPRINTF(Fault, "Reading floatint register through the TC!\n");
@@ -338,7 +336,7 @@ O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width)
}
template <class Impl>
-FloatRegBits
+TheISA::FloatRegBits
O3ThreadContext<Impl>::readFloatRegBits(int reg_idx)
{
return cpu->readArchFloatRegInt(reg_idx, thread->readTid());
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index 1c8105204..b6f2e14c0 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -31,8 +31,6 @@
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh
index 92402adc6..66b4aaae2 100644
--- a/src/cpu/o3/tournament_pred.hh
+++ b/src/cpu/o3/tournament_pred.hh
@@ -31,9 +31,8 @@
#ifndef __CPU_O3_TOURNAMENT_PRED_HH__
#define __CPU_O3_TOURNAMENT_PRED_HH__
-// For Addr type.
-#include "arch/isa_traits.hh"
#include "cpu/o3/sat_counter.hh"
+#include "sim/host.hh"
#include <vector>
/**
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index f58b81990..80f18434c 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -47,6 +47,7 @@
#include "arch/faults.hh"
#include "arch/alpha/osfpal.hh"
#include "arch/alpha/tlb.hh"
+#include "arch/alpha/types.hh"
#include "arch/vtophys.hh"
#include "base/callback.hh"
//#include "base/remote_gdb.hh"
diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh
index 67691d416..75ac464ec 100644
--- a/src/cpu/ozone/dyn_inst.hh
+++ b/src/cpu/ozone/dyn_inst.hh
@@ -32,6 +32,7 @@
#define __CPU_OZONE_DYN_INST_HH__
#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "config/full_system.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/inst_seq.hh"
diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh
index bad902c2a..ba0d70417 100644
--- a/src/cpu/ozone/dyn_inst_impl.hh
+++ b/src/cpu/ozone/dyn_inst_impl.hh
@@ -29,13 +29,10 @@
*/
#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
#include "config/full_system.hh"
#include "cpu/ozone/dyn_inst.hh"
#include "kern/kernel_stats.hh"
-using namespace TheISA;
-
template <class Impl>
OzoneDynInst<Impl>::OzoneDynInst(OzoneCPU *cpu)
: BaseDynInst<Impl>(0, 0, 0, 0, cpu)
diff --git a/src/cpu/ozone/ea_list.hh b/src/cpu/ozone/ea_list.hh
index 64882632c..d9e9d701f 100644
--- a/src/cpu/ozone/ea_list.hh
+++ b/src/cpu/ozone/ea_list.hh
@@ -35,8 +35,8 @@
#include <list>
#include <utility>
-#include "arch/isa_traits.hh"
#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
/**
* Simple class to hold onto a list of pairs, each pair having a memory
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index 9da937320..c9c5a869b 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -32,6 +32,7 @@
#include "arch/faults.hh"
#include "arch/isa_traits.hh"
+#include "arch/utility.hh"
#include "base/statistics.hh"
#include "cpu/thread_context.hh"
#include "cpu/exetrace.hh"
diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh
index cbb73364e..701fc0ee9 100644
--- a/src/cpu/ozone/inorder_back_end_impl.hh
+++ b/src/cpu/ozone/inorder_back_end_impl.hh
@@ -29,12 +29,10 @@
*/
#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "cpu/ozone/inorder_back_end.hh"
#include "cpu/ozone/thread_state.hh"
-using namespace TheISA;
-
template <class Impl>
InorderBackEnd<Impl>::InorderBackEnd(Params *params)
: squashPending(false),
diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh
index 1b5340e55..38c1c09a2 100644
--- a/src/cpu/ozone/lsq_unit.hh
+++ b/src/cpu/ozone/lsq_unit.hh
@@ -36,7 +36,7 @@
#include <algorithm>
#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "config/full_system.hh"
#include "base/hashmap.hh"
#include "cpu/inst_seq.hh"
diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh
index f8cb18634..ee0804036 100644
--- a/src/cpu/ozone/lsq_unit_impl.hh
+++ b/src/cpu/ozone/lsq_unit_impl.hh
@@ -28,7 +28,7 @@
* Authors: Kevin Lim
*/
-#include "arch/isa_traits.hh"
+#include "arch/faults.hh"
#include "base/str.hh"
#include "cpu/ozone/lsq_unit.hh"
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 2eb09d01a..9a21a9d01 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -37,7 +37,7 @@
#include <algorithm>
#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "config/full_system.hh"
#include "base/hashmap.hh"
#include "cpu/inst_seq.hh"
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index 88e9c218f..7eef4b11f 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -30,7 +30,7 @@
#include "config/use_checker.hh"
-#include "arch/isa_traits.hh"
+#include "arch/faults.hh"
#include "base/str.hh"
#include "cpu/ozone/lw_lsq.hh"
#include "cpu/checker/cpu.hh"
diff --git a/src/cpu/ozone/null_predictor.hh b/src/cpu/ozone/null_predictor.hh
index a98c89d69..0751338b7 100644
--- a/src/cpu/ozone/null_predictor.hh
+++ b/src/cpu/ozone/null_predictor.hh
@@ -31,8 +31,8 @@
#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__
#define __CPU_OZONE_NULL_PREDICTOR_HH__
-#include "arch/isa_traits.hh"
#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
template <class Impl>
class NullPredictor
diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh
index 503675738..2271cd68a 100644
--- a/src/cpu/ozone/ozone_impl.hh
+++ b/src/cpu/ozone/ozone_impl.hh
@@ -31,7 +31,6 @@
#ifndef __CPU_OZONE_OZONE_IMPL_HH__
#define __CPU_OZONE_OZONE_IMPL_HH__
-#include "arch/alpha/isa_traits.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/front_end.hh"
#include "cpu/ozone/inst_queue.hh"
diff --git a/src/cpu/ozone/simple_impl.hh b/src/cpu/ozone/simple_impl.hh
index 3199d8d8a..42002180b 100644
--- a/src/cpu/ozone/simple_impl.hh
+++ b/src/cpu/ozone/simple_impl.hh
@@ -31,7 +31,6 @@
#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__
#define __CPU_OZONE_SIMPLE_IMPL_HH__
-#include "arch/isa_traits.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/cpu.hh"
#include "cpu/ozone/front_end.hh"
diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh
index ef4b1429d..8234cf938 100644
--- a/src/cpu/ozone/thread_state.hh
+++ b/src/cpu/ozone/thread_state.hh
@@ -32,7 +32,8 @@
#define __CPU_OZONE_THREAD_STATE_HH__
#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
+#include "arch/regfile.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/process.hh"
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 12bfdeb9b..c396f5033 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -33,6 +33,7 @@
#include "cpu/simple/atomic.hh"
#include "mem/packet_impl.hh"
#include "sim/builder.hh"
+#include "sim/system.hh"
using namespace std;
using namespace TheISA;
@@ -158,18 +159,31 @@ AtomicSimpleCPU::~AtomicSimpleCPU()
void
AtomicSimpleCPU::serialize(ostream &os)
{
- SERIALIZE_ENUM(_status);
- BaseSimpleCPU::serialize(os);
+ SimObject::State so_state = SimObject::getState();
+ SERIALIZE_ENUM(so_state);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
+ BaseSimpleCPU::serialize(os);
}
void
AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
- UNSERIALIZE_ENUM(_status);
- BaseSimpleCPU::unserialize(cp, section);
+ SimObject::State so_state;
+ UNSERIALIZE_ENUM(so_state);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+ BaseSimpleCPU::unserialize(cp, section);
+}
+
+void
+AtomicSimpleCPU::resume()
+{
+ assert(system->getMemoryMode() == System::Atomic);
+ changeState(SimObject::Running);
+ if (thread->status() == ThreadContext::Active) {
+ if (!tickEvent.scheduled())
+ tickEvent.schedule(curTick);
+ }
}
void
@@ -451,11 +465,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
SimObjectParam<MemObject *> mem;
+ SimObjectParam<System *> system;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
- SimObjectParam<System *> system;
Param<int> cpu_id;
Param<Tick> profile;
#else
@@ -483,11 +497,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(mem, "memory"),
+ INIT_PARAM(system, "system object"),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(system, "system object"),
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(profile, ""),
#else
@@ -520,11 +534,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU)
params->width = width;
params->simulate_stalls = simulate_stalls;
params->mem = mem;
+ params->system = system;
#if FULL_SYSTEM
params->itb = itb;
params->dtb = dtb;
- params->system = system;
params->cpu_id = cpu_id;
params->profile = profile;
#else
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 179b4a721..b602af558 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -126,6 +126,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
+ virtual void resume();
void switchOut();
void takeOverFrom(BaseCPU *oldCPU);
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index a50541189..801c96c88 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -26,10 +26,10 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Steve Reinhardt
- * Korey Sewell
*/
#include "arch/utility.hh"
+#include "arch/faults.hh"
#include "base/cprintf.hh"
#include "base/inifile.hh"
#include "base/loader/symtab.hh"
@@ -55,10 +55,10 @@
#include "sim/sim_events.hh"
#include "sim/sim_object.hh"
#include "sim/stats.hh"
+#include "sim/system.hh"
#if FULL_SYSTEM
#include "base/remote_gdb.hh"
-#include "sim/system.hh"
#include "arch/tlb.hh"
#include "arch/stacktrace.hh"
#include "arch/vtophys.hh"
@@ -178,8 +178,8 @@ void
BaseSimpleCPU::serialize(ostream &os)
{
BaseCPU::serialize(os);
- SERIALIZE_SCALAR(inst);
- nameOut(os, csprintf("%s.xc", name()));
+// SERIALIZE_SCALAR(inst);
+ nameOut(os, csprintf("%s.xc.0", name()));
thread->serialize(os);
}
@@ -187,8 +187,8 @@ void
BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
BaseCPU::unserialize(cp, section);
- UNSERIALIZE_SCALAR(inst);
- thread->unserialize(cp, csprintf("%s.xc", section));
+// UNSERIALIZE_SCALAR(inst);
+ thread->unserialize(cp, csprintf("%s.xc.0", section));
}
void
@@ -455,6 +455,7 @@ BaseSimpleCPU::advancePC(Fault fault)
#else
thread->setNextPC(thread->readNextNPC());
thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+ assert(thread->readNextPC() != thread->readNextNPC());
#endif
}
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index e55301c6b..5c1654f7e 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -33,6 +33,7 @@
#include "cpu/simple/timing.hh"
#include "mem/packet_impl.hh"
#include "sim/builder.hh"
+#include "sim/system.hh"
using namespace std;
using namespace TheISA;
@@ -84,14 +85,22 @@ TimingSimpleCPU::CpuPort::recvStatusChange(Status status)
panic("TimingSimpleCPU doesn't expect recvStatusChange callback!");
}
+
+void
+TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t)
+{
+ pkt = _pkt;
+ Event::schedule(t);
+}
+
TimingSimpleCPU::TimingSimpleCPU(Params *p)
- : BaseSimpleCPU(p), icachePort(this), dcachePort(this)
+ : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock)
{
_status = Idle;
ifetch_pkt = dcache_pkt = NULL;
drainEvent = NULL;
fetchEvent = NULL;
- state = SimObject::Timing;
+ changeState(SimObject::Running);
}
@@ -102,29 +111,31 @@ TimingSimpleCPU::~TimingSimpleCPU()
void
TimingSimpleCPU::serialize(ostream &os)
{
- SERIALIZE_ENUM(_status);
+ SimObject::State so_state = SimObject::getState();
+ SERIALIZE_ENUM(so_state);
BaseSimpleCPU::serialize(os);
}
void
TimingSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
- UNSERIALIZE_ENUM(_status);
+ SimObject::State so_state;
+ UNSERIALIZE_ENUM(so_state);
BaseSimpleCPU::unserialize(cp, section);
}
-bool
+unsigned int
TimingSimpleCPU::drain(Event *drain_event)
{
// TimingSimpleCPU is ready to drain if it's not waiting for
// an access to complete.
if (status() == Idle || status() == Running || status() == SwitchedOut) {
- changeState(SimObject::DrainedTiming);
- return true;
+ changeState(SimObject::Drained);
+ return 0;
} else {
changeState(SimObject::Draining);
drainEvent = drain_event;
- return false;
+ return 1;
}
}
@@ -134,7 +145,9 @@ TimingSimpleCPU::resume()
if (_status != SwitchedOut && _status != Idle) {
// Delete the old event if it existed.
if (fetchEvent) {
- assert(!fetchEvent->scheduled());
+ if (fetchEvent->scheduled())
+ fetchEvent->deschedule();
+
delete fetchEvent;
}
@@ -142,12 +155,9 @@ TimingSimpleCPU::resume()
new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false);
fetchEvent->schedule(curTick);
}
-}
-void
-TimingSimpleCPU::setMemoryMode(State new_mode)
-{
- assert(new_mode == SimObject::Timing);
+ assert(system->getMemoryMode() == System::Timing);
+ changeState(SimObject::Running);
}
void
@@ -460,11 +470,26 @@ TimingSimpleCPU::completeIfetch(Packet *pkt)
}
}
+void
+TimingSimpleCPU::IcachePort::ITickEvent::process()
+{
+ cpu->completeIfetch(pkt);
+}
bool
TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt)
{
- cpu->completeIfetch(pkt);
+ // These next few lines could be replaced with something faster
+ // who knows what though
+ Tick time = pkt->req->getTime();
+ while (time < curTick)
+ time += lat;
+
+ if (time == curTick)
+ cpu->completeIfetch(pkt);
+ else
+ tickEvent.schedule(pkt, time);
+
return true;
}
@@ -514,18 +539,32 @@ void
TimingSimpleCPU::completeDrain()
{
DPRINTF(Config, "Done draining\n");
- changeState(SimObject::DrainedTiming);
+ changeState(SimObject::Drained);
drainEvent->process();
}
bool
TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt)
{
- cpu->completeDataAccess(pkt);
+ Tick time = pkt->req->getTime();
+ while (time < curTick)
+ time += lat;
+
+ if (time == curTick)
+ cpu->completeDataAccess(pkt);
+ else
+ tickEvent.schedule(pkt, time);
+
return true;
}
void
+TimingSimpleCPU::DcachePort::DTickEvent::process()
+{
+ cpu->completeDataAccess(pkt);
+}
+
+void
TimingSimpleCPU::DcachePort::recvRetry()
{
// we shouldn't get a retry unless we have a packet that we're
@@ -551,11 +590,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
SimObjectParam<MemObject *> mem;
+ SimObjectParam<System *> system;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
- SimObjectParam<System *> system;
Param<int> cpu_id;
Param<Tick> profile;
#else
@@ -583,11 +622,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(mem, "memory"),
+ INIT_PARAM(system, "system object"),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(system, "system object"),
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(profile, ""),
#else
@@ -618,11 +657,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU)
params->functionTrace = function_trace;
params->functionTraceStart = function_trace_start;
params->mem = mem;
+ params->system = system;
#if FULL_SYSTEM
params->itb = itb;
params->dtb = dtb;
- params->system = system;
params->cpu_id = cpu_id;
params->profile = profile;
#else
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 0a3f91e6c..d03fa4bc0 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -74,11 +74,12 @@ class TimingSimpleCPU : public BaseSimpleCPU
{
protected:
TimingSimpleCPU *cpu;
+ Tick lat;
public:
- CpuPort(const std::string &_name, TimingSimpleCPU *_cpu)
- : Port(_name), cpu(_cpu)
+ CpuPort(const std::string &_name, TimingSimpleCPU *_cpu, Tick _lat)
+ : Port(_name), cpu(_cpu), lat(_lat)
{ }
protected:
@@ -92,14 +93,26 @@ class TimingSimpleCPU : public BaseSimpleCPU
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
{ resp.clear(); snoop.clear(); }
+
+ struct TickEvent : public Event
+ {
+ Packet *pkt;
+ TimingSimpleCPU *cpu;
+
+ TickEvent(TimingSimpleCPU *_cpu)
+ :Event(&mainEventQueue), cpu(_cpu) {}
+ const char *description() { return "Timing CPU clock event"; }
+ void schedule(Packet *_pkt, Tick t);
+ };
+
};
class IcachePort : public CpuPort
{
public:
- IcachePort(TimingSimpleCPU *_cpu)
- : CpuPort(_cpu->name() + "-iport", _cpu)
+ IcachePort(TimingSimpleCPU *_cpu, Tick _lat)
+ : CpuPort(_cpu->name() + "-iport", _cpu, _lat), tickEvent(_cpu)
{ }
protected:
@@ -107,14 +120,26 @@ class TimingSimpleCPU : public BaseSimpleCPU
virtual bool recvTiming(Packet *pkt);
virtual void recvRetry();
+
+ struct ITickEvent : public TickEvent
+ {
+
+ ITickEvent(TimingSimpleCPU *_cpu)
+ : TickEvent(_cpu) {}
+ void process();
+ const char *description() { return "Timing CPU clock event"; }
+ };
+
+ ITickEvent tickEvent;
+
};
class DcachePort : public CpuPort
{
public:
- DcachePort(TimingSimpleCPU *_cpu)
- : CpuPort(_cpu->name() + "-dport", _cpu)
+ DcachePort(TimingSimpleCPU *_cpu, Tick _lat)
+ : CpuPort(_cpu->name() + "-dport", _cpu, _lat), tickEvent(_cpu)
{ }
protected:
@@ -122,6 +147,17 @@ class TimingSimpleCPU : public BaseSimpleCPU
virtual bool recvTiming(Packet *pkt);
virtual void recvRetry();
+
+ struct DTickEvent : public TickEvent
+ {
+ DTickEvent(TimingSimpleCPU *_cpu)
+ : TickEvent(_cpu) {}
+ void process();
+ const char *description() { return "Timing CPU clock event"; }
+ };
+
+ DTickEvent tickEvent;
+
};
IcachePort icachePort;
@@ -137,9 +173,8 @@ class TimingSimpleCPU : public BaseSimpleCPU
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
- virtual bool drain(Event *drain_event);
+ virtual unsigned int drain(Event *drain_event);
virtual void resume();
- virtual void setMemoryMode(State new_mode);
void switchOut();
void takeOverFrom(BaseCPU *oldCPU);
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index af1db2ff2..5f86cf2b7 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -196,6 +196,7 @@ SimpleThread::copyState(ThreadContext *oldContext)
#if !FULL_SYSTEM
funcExeInst = oldContext->readFuncExeInst();
#endif
+ inst = oldContext->getInst();
}
void
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index d36853db4..242cfd0e1 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -449,8 +449,8 @@ class SimpleThread : public ThreadState
}
#endif
- void changeRegFileContext(RegFile::ContextParam param,
- RegFile::ContextVal val)
+ void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val)
{
regs.changeContext(param, val);
}
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index ea1a65148..578d14191 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -34,14 +34,16 @@
#include <bitset>
#include <string>
+#include "arch/isa_traits.hh"
+#include "sim/faults.hh"
#include "base/bitfield.hh"
#include "base/hashmap.hh"
#include "base/misc.hh"
#include "base/refcnt.hh"
#include "cpu/op_class.hh"
#include "cpu/o3/dyn_inst.hh"
+#include "sim/faults.hh"
#include "sim/host.hh"
-#include "arch/isa_traits.hh"
// forward declarations
struct AlphaSimpleImpl;
@@ -214,6 +216,7 @@ class StaticInstBase : public RefCounted
bool isIndirectCtrl() const { return flags[IsIndirectControl]; }
bool isCondCtrl() const { return flags[IsCondControl]; }
bool isUncondCtrl() const { return flags[IsUncondControl]; }
+ bool isCondDelaySlot() const { return flags[IsCondDelaySlot]; }
bool isThreadSync() const { return flags[IsThreadSync]; }
bool isSerializing() const { return flags[IsSerializing] ||
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index e019e22bc..73046097d 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -31,6 +31,9 @@
#ifndef __CPU_THREAD_CONTEXT_HH__
#define __CPU_THREAD_CONTEXT_HH__
+#include "arch/types.hh"
+#include "arch/regfile.hh"
+#include "arch/syscallreturn.hh"
#include "config/full_system.hh"
#include "mem/request.hh"
#include "sim/faults.hh"
@@ -254,8 +257,8 @@ class ThreadContext
virtual int exit() { return 1; };
#endif
- virtual void changeRegFileContext(RegFile::ContextParam param,
- RegFile::ContextVal val) = 0;
+ virtual void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val) = 0;
};
/**
@@ -438,8 +441,8 @@ class ProxyThreadContext : public ThreadContext
Counter readFuncExeInst() { return actualTC->readFuncExeInst(); }
#endif
- void changeRegFileContext(RegFile::ContextParam param,
- RegFile::ContextVal val)
+ void changeRegFileContext(TheISA::RegContextParam param,
+ TheISA::RegContextVal val)
{
actualTC->changeRegFileContext(param, val);
}
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index b03a2e2bb..6e985054f 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -31,7 +31,7 @@
#ifndef __CPU_THREAD_STATE_HH__
#define __CPU_THREAD_STATE_HH__
-#include "arch/isa_traits.hh"
+#include "arch/types.hh"
#include "cpu/thread_context.hh"
#if !FULL_SYSTEM