summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2005-05-03 10:56:47 -0400
committerKevin Lim <ktlim@umich.edu>2005-05-03 10:56:47 -0400
commit61d95de4c886911fa0b7dc9d587ffe5b292b739e (patch)
treed70531683cfb9bdb7ab967a99fbb3d6e0c34814f
parent6191d3e4443b5337232a238a3a0dd5d11249e223 (diff)
downloadgem5-61d95de4c886911fa0b7dc9d587ffe5b292b739e.tar.xz
Large update of several parts of my code. The most notable change is the inclusion of a full-fledged load/store queue. At the moment it still has some issues running, but most of the code is hopefully close to the final version.
SConscript: arch/isa_parser.py: cpu/base_dyn_inst.cc: Remove OOO CPU stuff. arch/alpha/faults.hh: Add fake memory fault. This will be removed eventually. arch/alpha/isa_desc: Change EA comp and Mem accessor to be const StaticInstPtrs. cpu/base_dyn_inst.hh: Update read/write calls to use load queue and store queue indices. cpu/beta_cpu/alpha_dyn_inst.hh: Change to const StaticInst in the register accessors. cpu/beta_cpu/alpha_dyn_inst_impl.hh: Update syscall code with thread numbers. cpu/beta_cpu/alpha_full_cpu.hh: Alter some of the full system code so it will compile without errors. cpu/beta_cpu/alpha_full_cpu_builder.cc: Created a DerivAlphaFullCPU class so I can instantiate different CPUs that have different template parameters. cpu/beta_cpu/alpha_full_cpu_impl.hh: Update some of the full system code so it compiles. cpu/beta_cpu/alpha_params.hh: cpu/beta_cpu/fetch_impl.hh: Remove asid. cpu/beta_cpu/comm.hh: Remove global history field. cpu/beta_cpu/commit.hh: Comment out rename map. cpu/beta_cpu/commit_impl.hh: Update some of the full system code so it compiles. Also change it so that it handles memory instructions properly. cpu/beta_cpu/cpu_policy.hh: Removed IQ from the IEW template parameter to make it more uniform. cpu/beta_cpu/decode.hh: Add debug function. cpu/beta_cpu/decode_impl.hh: Slight updates for decode in the case where it causes a squash. cpu/beta_cpu/fetch.hh: cpu/beta_cpu/rob.hh: Comment out unneccessary code. cpu/beta_cpu/full_cpu.cc: Changed some of the full system code so it compiles. Updated exec contexts and so forth to hopefully make multithreading easier. cpu/beta_cpu/full_cpu.hh: Updated some of the full system code to make it compile. cpu/beta_cpu/iew.cc: Removed IQ from template parameter to IEW. cpu/beta_cpu/iew.hh: Removed IQ from template parameter to IEW. Updated IEW to recognize the Load/Store queue. cpu/beta_cpu/iew_impl.hh: New handling of memory instructions through the Load/Store queue. cpu/beta_cpu/inst_queue.hh: Updated comment. cpu/beta_cpu/inst_queue_impl.hh: Slightly different handling of memory instructions due to Load/Store queue. cpu/beta_cpu/regfile.hh: Updated full system code so it compiles. cpu/beta_cpu/rob_impl.hh: Moved some code around; no major functional changes. cpu/ooo_cpu/ooo_cpu.hh: Slight updates to OOO CPU; still does not work. cpu/static_inst.hh: Remove OOO CPU stuff. Change ea comp and mem acc to return const StaticInst. kern/kernel_stats.hh: Extra forward declares added due to compile error. --HG-- extra : convert_revision : 594a7cdbe57f6c2bda7d08856fcd864604a6238e
-rw-r--r--SConscript8
-rw-r--r--arch/alpha/faults.hh1
-rw-r--r--arch/alpha/isa_desc12
-rwxr-xr-xarch/isa_parser.py3
-rw-r--r--cpu/base_dyn_inst.cc7
-rw-r--r--cpu/base_dyn_inst.hh13
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst.hh21
-rw-r--r--cpu/beta_cpu/alpha_dyn_inst_impl.hh3
-rw-r--r--cpu/beta_cpu/alpha_full_cpu.hh32
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_builder.cc61
-rw-r--r--cpu/beta_cpu/alpha_full_cpu_impl.hh432
-rw-r--r--cpu/beta_cpu/alpha_params.hh2
-rw-r--r--cpu/beta_cpu/comm.hh7
-rw-r--r--cpu/beta_cpu/commit.hh3
-rw-r--r--cpu/beta_cpu/commit_impl.hh36
-rw-r--r--cpu/beta_cpu/cpu_policy.hh2
-rw-r--r--cpu/beta_cpu/decode.hh4
-rw-r--r--cpu/beta_cpu/decode_impl.hh56
-rw-r--r--cpu/beta_cpu/fetch.hh7
-rw-r--r--cpu/beta_cpu/fetch_impl.hh20
-rw-r--r--cpu/beta_cpu/full_cpu.cc84
-rw-r--r--cpu/beta_cpu/full_cpu.hh44
-rw-r--r--cpu/beta_cpu/iew.cc2
-rw-r--r--cpu/beta_cpu/iew.hh46
-rw-r--r--cpu/beta_cpu/iew_impl.hh229
-rw-r--r--cpu/beta_cpu/inst_queue.hh2
-rw-r--r--cpu/beta_cpu/inst_queue_impl.hh125
-rw-r--r--cpu/beta_cpu/regfile.hh373
-rw-r--r--cpu/beta_cpu/rob.hh2
-rw-r--r--cpu/beta_cpu/rob_impl.hh17
-rw-r--r--cpu/ooo_cpu/ooo_cpu.hh37
-rw-r--r--cpu/static_inst.hh8
-rw-r--r--kern/kernel_stats.hh3
-rw-r--r--python/m5/objects/AlphaFullCPU.mpy79
34 files changed, 846 insertions, 935 deletions
diff --git a/SConscript b/SConscript
index 525a94818..7b5b2a970 100644
--- a/SConscript
+++ b/SConscript
@@ -52,7 +52,6 @@ base_sources = Split('''
arch/alpha/full_cpu_exec.cc
arch/alpha/faults.cc
arch/alpha/isa_traits.cc
- arch/alpha/ooo_cpu_exec.cc
base/circlebuf.cc
base/copyright.cc
@@ -157,10 +156,6 @@ base_sources = Split('''
cpu/full_cpu/iq/seznec/iq_seznec.cc
cpu/full_cpu/iq/standard/iq_standard.cc
cpu/inorder_cpu/inorder_cpu.cc
- cpu/ooo_cpu/ea_list.cc
- cpu/ooo_cpu/ooo_cpu.cc
- cpu/ooo_cpu/ooo_dyn_inst.cc
- cpu/ooo_cpu/ooo_sim_obj.cc
cpu/sampling_cpu/sampling_cpu.cc
cpu/simple_cpu/simple_cpu.cc
cpu/trace/reader/mem_trace_reader.cc
@@ -402,8 +397,7 @@ env.Command(Split('''arch/alpha/decoder.cc
arch/alpha/fast_cpu_exec.cc
arch/alpha/simple_cpu_exec.cc
arch/alpha/inorder_cpu_exec.cc
- arch/alpha/full_cpu_exec.cc
- arch/alpha/ooo_cpu_exec.cc'''),
+ arch/alpha/full_cpu_exec.cc'''),
Split('''arch/alpha/isa_desc
arch/isa_parser.py'''),
'$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha')
diff --git a/arch/alpha/faults.hh b/arch/alpha/faults.hh
index 45ac122dc..a49a1c4f0 100644
--- a/arch/alpha/faults.hh
+++ b/arch/alpha/faults.hh
@@ -47,6 +47,7 @@ enum Fault {
Fen_Fault, // FP not-enabled fault
Pal_Fault, // call_pal S/W interrupt
Integer_Overflow_Fault,
+ Fake_Mem_Fault,
Num_Faults // number of faults
};
diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc
index 904af3ef0..0e07400d3 100644
--- a/arch/alpha/isa_desc
+++ b/arch/alpha/isa_desc
@@ -744,9 +744,9 @@ output header {{
/// Memory request flags. See mem_req_base.hh.
unsigned memAccessFlags;
/// Pointer to EAComp object.
- StaticInstPtr<AlphaISA> eaCompPtr;
+ const StaticInstPtr<AlphaISA> eaCompPtr;
/// Pointer to MemAcc object.
- StaticInstPtr<AlphaISA> memAccPtr;
+ const StaticInstPtr<AlphaISA> memAccPtr;
/// Constructor
Memory(const char *mnem, MachInst _machInst, OpClass __opClass,
@@ -762,8 +762,8 @@ output header {{
public:
- StaticInstPtr<AlphaISA> &eaCompInst() { return eaCompPtr; }
- StaticInstPtr<AlphaISA> &memAccInst() { return memAccPtr; }
+ const StaticInstPtr<AlphaISA> &eaCompInst() const { return eaCompPtr; }
+ const StaticInstPtr<AlphaISA> &memAccInst() const { return memAccPtr; }
};
/**
@@ -2539,9 +2539,9 @@ decode OPCODE default Unknown::unknown() {
xc->syscall();
}}, IsNonSpeculative);
// Read uniq reg into ABI return value register (r0)
- 0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative);
+ 0x9e: rduniq({{ R0 = Runiq; }});
// Write uniq reg with value from ABI arg register (r16)
- 0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative);
+ 0x9f: wruniq({{ Runiq = R16; }});
}
}
#endif
diff --git a/arch/isa_parser.py b/arch/isa_parser.py
index 5e0267c9e..8187cf188 100755
--- a/arch/isa_parser.py
+++ b/arch/isa_parser.py
@@ -642,9 +642,6 @@ CpuModel('FullCPU', 'full_cpu_exec.cc',
CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc',
'#include "cpu/beta_cpu/alpha_dyn_inst.hh"',
{ 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' })
-CpuModel('OoOCPU', 'ooo_cpu_exec.cc',
- '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"',
- { 'CPU_exec_context': 'OoODynInst<OoOImpl>' })
# Expand template with CPU-specific references into a dictionary with
# an entry for each CPU model name. The entry key is the model name
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc
index b8424f576..ecfe5a4b0 100644
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -43,8 +43,6 @@
#include "cpu/base_dyn_inst.hh"
#include "cpu/beta_cpu/alpha_impl.hh"
#include "cpu/beta_cpu/alpha_full_cpu.hh"
-#include "cpu/ooo_cpu/ooo_impl.hh"
-#include "cpu/ooo_cpu/ooo_cpu.hh"
using namespace std;
@@ -384,14 +382,9 @@ BaseDynInst<Impl>::eaSrcsReady()
// Forward declaration...
template class BaseDynInst<AlphaSimpleImpl>;
-template class BaseDynInst<OoOImpl>;
template <>
int
BaseDynInst<AlphaSimpleImpl>::instcount = 0;
-template <>
-int
-BaseDynInst<OoOImpl>::instcount = 0;
-
#endif // __CPU_BASE_DYN_INST_CC__
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index 943293b25..509874fad 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -404,6 +404,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
const Addr &getEA() const { return instEffAddr; }
bool doneEACalc() { return eaCalcDone; }
bool eaSrcsReady();
+
+ public:
+ int16_t lqIdx;
+ int16_t sqIdx;
};
template<class Impl>
@@ -419,6 +423,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
// Record key MemReq parameters so we can generate another one
// just like it for the timing access without calling translate()
// again (which might mess up the TLB).
+ // Do I ever really need this? -KTL 3/05
effAddr = req->vaddr;
physEffAddr = req->paddr;
memReqFlags = req->flags;
@@ -433,7 +438,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
#endif
if (fault == No_Fault) {
- fault = cpu->read(req, data);
+ fault = cpu->read(req, data, lqIdx);
}
else {
// Return a fixed value to keep simulation deterministic even
@@ -459,8 +464,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
traceData->setData(data);
}
- storeSize = sizeof(T);
- storeData = data;
+// storeSize = sizeof(T);
+// storeData = data;
MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
@@ -485,7 +490,7 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
#endif
if (fault == No_Fault) {
- fault = cpu->write(req, data);
+ fault = cpu->write(req, data, sqIdx);
}
if (res) {
diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh
index b2f0d703e..d34fa071c 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@@ -47,11 +47,10 @@ class AlphaDynInst : public BaseDynInst<Impl>
/** BaseDynInst constructor given a static inst pointer. */
AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst);
- /** Executes the instruction. Why the hell did I put this here? */
+ /** Executes the instruction.*/
Fault execute()
{
- this->fault = this->staticInst->execute(this, this->traceData);
- return this->fault;
+ return this->fault = this->staticInst->execute(this, this->traceData);
}
public:
@@ -105,47 +104,47 @@ class AlphaDynInst : public BaseDynInst<Impl>
// storage (which is pretty hard to imagine they would have reason
// to do).
- uint64_t readIntReg(StaticInst<ISA> *si, int idx)
+ uint64_t readIntReg(const StaticInst<ISA> *si, int idx)
{
return this->cpu->readIntReg(_srcRegIdx[idx]);
}
- float readFloatRegSingle(StaticInst<ISA> *si, int idx)
+ float readFloatRegSingle(const StaticInst<ISA> *si, int idx)
{
return this->cpu->readFloatRegSingle(_srcRegIdx[idx]);
}
- double readFloatRegDouble(StaticInst<ISA> *si, int idx)
+ double readFloatRegDouble(const StaticInst<ISA> *si, int idx)
{
return this->cpu->readFloatRegDouble(_srcRegIdx[idx]);
}
- uint64_t readFloatRegInt(StaticInst<ISA> *si, int idx)
+ uint64_t readFloatRegInt(const StaticInst<ISA> *si, int idx)
{
return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
- void setIntReg(StaticInst<ISA> *si, int idx, uint64_t val)
+ void setIntReg(const StaticInst<ISA> *si, int idx, uint64_t val)
{
this->cpu->setIntReg(_destRegIdx[idx], val);
this->instResult.integer = val;
}
- void setFloatRegSingle(StaticInst<ISA> *si, int idx, float val)
+ void setFloatRegSingle(const StaticInst<ISA> *si, int idx, float val)
{
this->cpu->setFloatRegSingle(_destRegIdx[idx], val);
this->instResult.fp = val;
}
- void setFloatRegDouble(StaticInst<ISA> *si, int idx, double val)
+ void setFloatRegDouble(const StaticInst<ISA> *si, int idx, double val)
{
this->cpu->setFloatRegDouble(_destRegIdx[idx], val);
this->instResult.dbl = val;
}
- void setFloatRegInt(StaticInst<ISA> *si, int idx, uint64_t val)
+ void setFloatRegInt(const StaticInst<ISA> *si, int idx, uint64_t val)
{
this->cpu->setFloatRegInt(_destRegIdx[idx], val);
this->instResult.integer = val;
diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
index 4a3ae99d4..3f530e182 100644
--- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
@@ -129,7 +129,8 @@ template <class Impl>
void
AlphaDynInst<Impl>::syscall()
{
- this->cpu->syscall();
+ this->cpu->syscall(this->threadNumber);
+// this->cpu->syscall();
}
#endif
diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh
index 3c29dd277..065b2fc4e 100644
--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@@ -28,8 +28,6 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
void regStats();
#ifdef FULL_SYSTEM
- bool inPalMode();
-
//Note that the interrupt stuff from the base CPU might be somewhat
//ISA specific (ie NumInterruptLevels). These functions might not
//be needed in FullCPU though.
@@ -106,13 +104,16 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
}
#ifdef FULL_SYSTEM
- uint64_t *getIPR();
+ uint64_t *getIpr();
uint64_t readIpr(int idx, Fault &fault);
Fault setIpr(int idx, uint64_t val);
int readIntrFlag();
void setIntrFlag(int val);
Fault hwrei();
- bool inPalMode();
+ bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); }
+ bool inPalMode(uint64_t PC)
+ { return AlphaISA::PcPAL(PC); }
+
void trap(Fault fault);
bool simPalCheck(int palFunc);
@@ -153,7 +154,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
}
}
- void syscall();
+ void syscall(short thread_num);
void squashStages();
#endif
@@ -168,11 +169,13 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
// Not sure this is used anywhere.
void intr_post(RegFile *regs, Fault fault, Addr pc);
// Actually used within exec files. Implement properly.
- void swap_palshadow(RegFile *regs, bool use_shadow);
+ void swapPALShadow(bool use_shadow);
// Called by CPU constructor. Can implement as I please.
void initCPU(RegFile *regs);
// Called by initCPU. Implement as I please.
void initIPRs(RegFile *regs);
+
+ void halt() { panic("Halt not implemented!\n"); }
#endif
@@ -193,6 +196,11 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
return error;
}
+ template <class T>
+ Fault read(MemReqPtr &req, T &data, int load_idx)
+ {
+ return this->iew.ldstQueue.read(req, data, load_idx);
+ }
template <class T>
Fault write(MemReqPtr &req, T &data)
@@ -218,7 +226,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
std::cerr << "Warning: "
<< req->xc->storeCondFailures
<< " consecutive store conditional failures "
- << "on cpu " << cpu_id
+ << "on cpu " << this->cpu_id
<< std::endl;
}
return No_Fault;
@@ -232,8 +240,8 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
// and all other stores (WH64?). Unsuccessful Store
// Conditionals would have returned above, and wouldn't fall
// through.
- for (int i = 0; i < system->execContexts.size(); i++){
- cregs = &system->execContexts[i]->regs.miscRegs;
+ for (int i = 0; i < this->system->execContexts.size(); i++){
+ cregs = &this->system->execContexts[i]->regs.miscRegs;
if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) {
cregs->lock_flag = false;
}
@@ -244,6 +252,12 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
return this->mem->write(req, (T)htoa(data));
}
+ template <class T>
+ Fault write(MemReqPtr &req, T &data, int store_idx)
+ {
+ return this->iew.ldstQueue.write(req, data, store_idx);
+ }
+
};
#endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__
diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc
index cf9536cb8..dc5b1aad1 100644
--- a/cpu/beta_cpu/alpha_full_cpu_builder.cc
+++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc
@@ -33,8 +33,17 @@
#include "mem/functional_mem/functional_memory.hh"
#endif // FULL_SYSTEM
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
+{
+ public:
+ DerivAlphaFullCPU(AlphaSimpleParams p)
+ : AlphaFullCPU<AlphaSimpleImpl>(p)
+ { }
+};
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+ Param<int> cycle_time;
Param<int> numThreads;
#ifdef FULL_SYSTEM
@@ -44,8 +53,6 @@ SimObjectParam<AlphaDTB *> dtb;
Param<int> mult;
#else
SimObjectVectorParam<Process *> workload;
-SimObjectParam<Process *> process;
-Param<short> asid;
#endif // FULL_SYSTEM
SimObjectParam<FunctionalMemory *> mem;
@@ -120,23 +127,25 @@ Param<unsigned> numROBEntries;
Param<unsigned> instShiftAmt;
-Param<bool> defReg;
+Param<bool> defer_registration;
-END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
-BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+ INIT_PARAM(cycle_time, "cpu cycle time"),
INIT_PARAM(numThreads, "number of HW thread contexts"),
#ifdef FULL_SYSTEM
INIT_PARAM(system, "System object"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
- INIT_PARAM_DFLT(mult, "System clock multiplier", 1),
+ INIT_PARAM(mult, "System clock multiplier"),
#else
INIT_PARAM(workload, "Processes to run"),
- INIT_PARAM_DFLT(process, "Process to run", NULL),
- INIT_PARAM(asid, "Address space ID"),
#endif // FULL_SYSTEM
INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -230,14 +239,16 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
- INIT_PARAM(defReg, "Defer registration")
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
-END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
-CREATE_SIM_OBJECT(BaseFullCPU)
+CREATE_SIM_OBJECT(DerivAlphaFullCPU)
{
- AlphaFullCPU<AlphaSimpleImpl> *cpu;
+ DerivAlphaFullCPU *cpu;
#ifdef FULL_SYSTEM
if (mult != 1)
@@ -255,30 +266,21 @@ CREATE_SIM_OBJECT(BaseFullCPU)
fatal("Must specify at least one workload!");
}
- Process *actual_process;
-
- if (process == NULL) {
- actual_process = workload[0];
- } else {
- actual_process = process;
- }
-
#endif
AlphaSimpleParams params;
+ params.cycleTime = cycle_time;
+
params.name = getInstanceName();
params.numberOfThreads = actual_num_threads;
#ifdef FULL_SYSTEM
- params._system = system;
+ params.system = system;
params.itb = itb;
params.dtb = dtb;
- params.freq = ticksPerSecond * mult;
#else
params.workload = workload;
- params.process = actual_process;
- params.asid = asid;
#endif // FULL_SYSTEM
params.mem = mem;
@@ -356,12 +358,15 @@ CREATE_SIM_OBJECT(BaseFullCPU)
params.instShiftAmt = 2;
- params.defReg = defReg;
+ params.defReg = defer_registration;
+
+ params.functionTrace = function_trace;
+ params.functionTraceStart = function_trace_start;
- cpu = new AlphaFullCPU<AlphaSimpleImpl>(params);
+ cpu = new DerivAlphaFullCPU(params);
return cpu;
}
-REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU)
+REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU)
diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh
index fccded193..c42e9e362 100644
--- a/cpu/beta_cpu/alpha_full_cpu_impl.hh
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@@ -12,6 +12,14 @@
#include "cpu/beta_cpu/alpha_params.hh"
#include "cpu/beta_cpu/comm.hh"
+#ifdef FULL_SYSTEM
+#include "arch/alpha/osfpal.hh"
+#include "arch/alpha/isa_traits.hh"
+//#include "arch/alpha/ev5.hh"
+
+//using namespace EV5;
+#endif
+
template <class Impl>
AlphaFullCPU<Impl>::AlphaFullCPU(Params &params)
: FullBetaCPU<Impl>(params)
@@ -42,9 +50,12 @@ AlphaFullCPU<Impl>::regStats()
#ifndef FULL_SYSTEM
+// Will probably need to know which thread is calling syscall
+// Will need to pass that information in to the DynInst when it is constructed,
+// so that this call can be made with the proper thread number.
template <class Impl>
void
-AlphaFullCPU<Impl>::syscall()
+AlphaFullCPU<Impl>::syscall(short thread_num)
{
DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");
@@ -60,7 +71,8 @@ AlphaFullCPU<Impl>::syscall()
// Copy over all important state to xc once all the unrolling is done.
copyToXC();
- this->process->syscall(this->xc);
+ this->thread[0]->syscall();
+// this->thread[thread_num]->syscall();
// Copy over all important state back to CPU.
copyFromXC();
@@ -102,6 +114,8 @@ AlphaFullCPU<Impl>::squashStages()
this->iew.squash();
this->iewQueue.advance();
this->iewQueue.advance();
+ // Needs to tell the LSQ to write back all of its data
+ this->iew.lsqWriteback();
this->rob.squash(rob_head);
this->commit.setSquashing();
@@ -203,390 +217,35 @@ template <class Impl>
uint64_t *
AlphaFullCPU<Impl>::getIpr()
{
- return regFile.getIpr();
+ return this->regFile.getIpr();
}
template <class Impl>
uint64_t
AlphaFullCPU<Impl>::readIpr(int idx, Fault &fault)
{
- uint64_t *ipr = getIpr();
- uint64_t retval = 0; // return value, default 0
-
- switch (idx) {
- case AlphaISA::IPR_PALtemp0:
- case AlphaISA::IPR_PALtemp1:
- case AlphaISA::IPR_PALtemp2:
- case AlphaISA::IPR_PALtemp3:
- case AlphaISA::IPR_PALtemp4:
- case AlphaISA::IPR_PALtemp5:
- case AlphaISA::IPR_PALtemp6:
- case AlphaISA::IPR_PALtemp7:
- case AlphaISA::IPR_PALtemp8:
- case AlphaISA::IPR_PALtemp9:
- case AlphaISA::IPR_PALtemp10:
- case AlphaISA::IPR_PALtemp11:
- case AlphaISA::IPR_PALtemp12:
- case AlphaISA::IPR_PALtemp13:
- case AlphaISA::IPR_PALtemp14:
- case AlphaISA::IPR_PALtemp15:
- case AlphaISA::IPR_PALtemp16:
- case AlphaISA::IPR_PALtemp17:
- case AlphaISA::IPR_PALtemp18:
- case AlphaISA::IPR_PALtemp19:
- case AlphaISA::IPR_PALtemp20:
- case AlphaISA::IPR_PALtemp21:
- case AlphaISA::IPR_PALtemp22:
- case AlphaISA::IPR_PALtemp23:
- case AlphaISA::IPR_PAL_BASE:
-
- case AlphaISA::IPR_IVPTBR:
- case AlphaISA::IPR_DC_MODE:
- case AlphaISA::IPR_MAF_MODE:
- case AlphaISA::IPR_ISR:
- case AlphaISA::IPR_EXC_ADDR:
- case AlphaISA::IPR_IC_PERR_STAT:
- case AlphaISA::IPR_DC_PERR_STAT:
- case AlphaISA::IPR_MCSR:
- case AlphaISA::IPR_ASTRR:
- case AlphaISA::IPR_ASTER:
- case AlphaISA::IPR_SIRR:
- case AlphaISA::IPR_ICSR:
- case AlphaISA::IPR_ICM:
- case AlphaISA::IPR_DTB_CM:
- case AlphaISA::IPR_IPLR:
- case AlphaISA::IPR_INTID:
- case AlphaISA::IPR_PMCTR:
- // no side-effect
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_CC:
- retval |= ipr[idx] & ULL(0xffffffff00000000);
- retval |= curTick & ULL(0x00000000ffffffff);
- break;
-
- case AlphaISA::IPR_VA:
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_VA_FORM:
- case AlphaISA::IPR_MM_STAT:
- case AlphaISA::IPR_IFAULT_VA_FORM:
- case AlphaISA::IPR_EXC_MASK:
- case AlphaISA::IPR_EXC_SUM:
- retval = ipr[idx];
- break;
-
- case AlphaISA::IPR_DTB_PTE:
- {
- AlphaISA::PTE &pte = dtb->index(!misspeculating());
-
- retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
- retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
- retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
- retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
- retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
- retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
- retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
- }
- break;
-
- // write only registers
- case AlphaISA::IPR_HWINT_CLR:
- case AlphaISA::IPR_SL_XMIT:
- case AlphaISA::IPR_DC_FLUSH:
- case AlphaISA::IPR_IC_FLUSH:
- case AlphaISA::IPR_ALT_MODE:
- case AlphaISA::IPR_DTB_IA:
- case AlphaISA::IPR_DTB_IAP:
- case AlphaISA::IPR_ITB_IA:
- case AlphaISA::IPR_ITB_IAP:
- fault = Unimplemented_Opcode_Fault;
- break;
-
- default:
- // invalid IPR
- fault = Unimplemented_Opcode_Fault;
- break;
- }
-
- return retval;
+ return this->regFile.readIpr(idx, fault);
}
template <class Impl>
Fault
AlphaFullCPU<Impl>::setIpr(int idx, uint64_t val)
{
- uint64_t *ipr = getIpr();
- uint64_t old;
-
- if (misspeculating())
- return No_Fault;
-
- switch (idx) {
- case AlphaISA::IPR_PALtemp0:
- case AlphaISA::IPR_PALtemp1:
- case AlphaISA::IPR_PALtemp2:
- case AlphaISA::IPR_PALtemp3:
- case AlphaISA::IPR_PALtemp4:
- case AlphaISA::IPR_PALtemp5:
- case AlphaISA::IPR_PALtemp6:
- case AlphaISA::IPR_PALtemp7:
- case AlphaISA::IPR_PALtemp8:
- case AlphaISA::IPR_PALtemp9:
- case AlphaISA::IPR_PALtemp10:
- case AlphaISA::IPR_PALtemp11:
- case AlphaISA::IPR_PALtemp12:
- case AlphaISA::IPR_PALtemp13:
- case AlphaISA::IPR_PALtemp14:
- case AlphaISA::IPR_PALtemp15:
- case AlphaISA::IPR_PALtemp16:
- case AlphaISA::IPR_PALtemp17:
- case AlphaISA::IPR_PALtemp18:
- case AlphaISA::IPR_PALtemp19:
- case AlphaISA::IPR_PALtemp20:
- case AlphaISA::IPR_PALtemp21:
- case AlphaISA::IPR_PALtemp22:
- case AlphaISA::IPR_PAL_BASE:
- case AlphaISA::IPR_IC_PERR_STAT:
- case AlphaISA::IPR_DC_PERR_STAT:
- case AlphaISA::IPR_PMCTR:
- // write entire quad w/ no side-effect
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_CC_CTL:
- // This IPR resets the cycle counter. We assume this only
- // happens once... let's verify that.
- assert(ipr[idx] == 0);
- ipr[idx] = 1;
- break;
-
- case AlphaISA::IPR_CC:
- // This IPR only writes the upper 64 bits. It's ok to write
- // all 64 here since we mask out the lower 32 in rpcc (see
- // isa_desc).
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_PALtemp23:
- // write entire quad w/ no side-effect
- old = ipr[idx];
- ipr[idx] = val;
- kernelStats.context(old, val);
- break;
-
- case AlphaISA::IPR_DTB_PTE:
- // write entire quad w/ no side-effect, tag is forthcoming
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_EXC_ADDR:
- // second least significant bit in PC is always zero
- ipr[idx] = val & ~2;
- break;
-
- case AlphaISA::IPR_ASTRR:
- case AlphaISA::IPR_ASTER:
- // only write least significant four bits - privilege mask
- ipr[idx] = val & 0xf;
- break;
-
- case AlphaISA::IPR_IPLR:
-#ifdef DEBUG
- if (break_ipl != -1 && break_ipl == (val & 0x1f))
- debug_break();
-#endif
-
- // only write least significant five bits - interrupt level
- ipr[idx] = val & 0x1f;
- kernelStats.swpipl(ipr[idx]);
- break;
-
- case AlphaISA::IPR_DTB_CM:
- kernelStats.mode((val & 0x18) != 0);
-
- case AlphaISA::IPR_ICM:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
-
- case AlphaISA::IPR_ALT_MODE:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
-
- case AlphaISA::IPR_MCSR:
- // more here after optimization...
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_SIRR:
- // only write software interrupt mask
- ipr[idx] = val & 0x7fff0;
- break;
-
- case AlphaISA::IPR_ICSR:
- ipr[idx] = val & ULL(0xffffff0300);
- break;
-
- case AlphaISA::IPR_IVPTBR:
- case AlphaISA::IPR_MVPTBR:
- ipr[idx] = val & ULL(0xffffffffc0000000);
- break;
-
- case AlphaISA::IPR_DC_TEST_CTL:
- ipr[idx] = val & 0x1ffb;
- break;
-
- case AlphaISA::IPR_DC_MODE:
- case AlphaISA::IPR_MAF_MODE:
- ipr[idx] = val & 0x3f;
- break;
-
- case AlphaISA::IPR_ITB_ASN:
- ipr[idx] = val & 0x7f0;
- break;
-
- case AlphaISA::IPR_DTB_ASN:
- ipr[idx] = val & ULL(0xfe00000000000000);
- break;
-
- case AlphaISA::IPR_EXC_SUM:
- case AlphaISA::IPR_EXC_MASK:
- // any write to this register clears it
- ipr[idx] = 0;
- break;
-
- case AlphaISA::IPR_INTID:
- case AlphaISA::IPR_SL_RCV:
- case AlphaISA::IPR_MM_STAT:
- case AlphaISA::IPR_ITB_PTE_TEMP:
- case AlphaISA::IPR_DTB_PTE_TEMP:
- // read-only registers
- return Unimplemented_Opcode_Fault;
-
- case AlphaISA::IPR_HWINT_CLR:
- case AlphaISA::IPR_SL_XMIT:
- case AlphaISA::IPR_DC_FLUSH:
- case AlphaISA::IPR_IC_FLUSH:
- // the following are write only
- ipr[idx] = val;
- break;
-
- case AlphaISA::IPR_DTB_IA:
- // really a control write
- ipr[idx] = 0;
-
- dtb->flushAll();
- break;
-
- case AlphaISA::IPR_DTB_IAP:
- // really a control write
- ipr[idx] = 0;
-
- dtb->flushProcesses();
- break;
-
- case AlphaISA::IPR_DTB_IS:
- // really a control write
- ipr[idx] = val;
-
- dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
- break;
-
- case AlphaISA::IPR_DTB_TAG: {
- struct AlphaISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
- pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- dtb->insert(val, pte);
- }
- break;
-
- case AlphaISA::IPR_ITB_PTE: {
- struct AlphaISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (ITB_PTE_GH(val) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = ITB_PTE_PPN(val);
- pte.xre = ITB_PTE_XRE(val);
- pte.xwe = 0;
- pte.fonr = ITB_PTE_FONR(val);
- pte.fonw = ITB_PTE_FONW(val);
- pte.asma = ITB_PTE_ASMA(val);
- pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
- }
- break;
-
- case AlphaISA::IPR_ITB_IA:
- // really a control write
- ipr[idx] = 0;
-
- itb->flushAll();
- break;
-
- case AlphaISA::IPR_ITB_IAP:
- // really a control write
- ipr[idx] = 0;
-
- itb->flushProcesses();
- break;
-
- case AlphaISA::IPR_ITB_IS:
- // really a control write
- ipr[idx] = val;
-
- itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
- break;
-
- default:
- // invalid IPR
- return Unimplemented_Opcode_Fault;
- }
-
- // no error...
- return No_Fault;
-
+ return this->regFile.setIpr(idx, val);
}
template <class Impl>
int
AlphaFullCPU<Impl>::readIntrFlag()
{
- return regs.intrflag;
+ return this->regFile.readIntrFlag();
}
template <class Impl>
void
AlphaFullCPU<Impl>::setIntrFlag(int val)
{
- regs.intrflag = val;
+ this->regFile.setIntrFlag(val);
}
// Can force commit stage to squash and stuff.
@@ -596,19 +255,17 @@ AlphaFullCPU<Impl>::hwrei()
{
uint64_t *ipr = getIpr();
- if (!PC_PAL(regs.pc))
+ if (!inPalMode())
return Unimplemented_Opcode_Fault;
setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]);
- if (!misspeculating()) {
- kernelStats.hwrei();
+// kernelStats.hwrei();
- if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
- AlphaISA::swap_palshadow(&regs, false);
+ if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
+// AlphaISA::swap_palshadow(&regs, false);
- AlphaISA::check_interrupts = true;
- }
+ this->checkInterrupts = true;
// FIXME: XXX check for interrupts? XXX
return No_Fault;
@@ -616,16 +273,9 @@ AlphaFullCPU<Impl>::hwrei()
template <class Impl>
bool
-AlphaFullCPU<Impl>::inPalMode()
-{
- return PC_PAL(readPC());
-}
-
-template <class Impl>
-bool
AlphaFullCPU<Impl>::simPalCheck(int palFunc)
{
- kernelStats.callpal(palFunc);
+// kernelStats.callpal(palFunc);
switch (palFunc) {
case PAL::halt:
@@ -636,7 +286,7 @@ AlphaFullCPU<Impl>::simPalCheck(int palFunc)
case PAL::bpt:
case PAL::bugchk:
- if (system->breakpoint())
+ if (this->system->breakpoint())
return false;
break;
}
@@ -651,21 +301,22 @@ template <class Impl>
void
AlphaFullCPU<Impl>::trap(Fault fault)
{
- uint64_t PC = commit.readPC();
+ // Keep in mind that a trap may be initiated by fetch if there's a TLB
+ // miss
+ uint64_t PC = this->commit.readCommitPC();
DPRINTF(Fault, "Fault %s\n", FaultName(fault));
- Stats::recordEvent(csprintf("Fault %s", FaultName(fault)));
+ this->recordEvent(csprintf("Fault %s", FaultName(fault)));
- assert(!misspeculating());
- kernelStats.fault(fault);
+// kernelStats.fault(fault);
if (fault == Arithmetic_Fault)
panic("Arithmetic traps are unimplemented!");
- AlphaISA::InternalProcReg *ipr = getIpr();
+ typename AlphaISA::InternalProcReg *ipr = getIpr();
// exception restart address - Get the commit PC
- if (fault != Interrupt_Fault || !PC_PAL(PC))
+ if (fault != Interrupt_Fault || !inPalMode(PC))
ipr[AlphaISA::IPR_EXC_ADDR] = PC;
if (fault == Pal_Fault || fault == Arithmetic_Fault /* ||
@@ -674,11 +325,12 @@ AlphaFullCPU<Impl>::trap(Fault fault)
ipr[AlphaISA::IPR_EXC_ADDR] += 4;
}
- if (!PC_PAL(PC))
- AlphaISA::swap_palshadow(&regs, true);
+ if (!inPalMode(PC))
+ swapPALShadow(true);
- setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] );
- setNextPC(PC + sizeof(MachInst));
+ this->regFile.setPC( ipr[AlphaISA::IPR_PAL_BASE] +
+ AlphaISA::fault_addr[fault] );
+ this->regFile.setNextPC(PC + sizeof(MachInst));
}
template <class Impl>
@@ -694,7 +346,7 @@ AlphaFullCPU<Impl>::processInterrupts()
// same logical index.
template <class Impl>
void
-AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
+AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
{
if (palShadowEnabled == use_shadow)
panic("swap_palshadow: wrong PAL shadow state");
@@ -703,6 +355,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
// Will have to lookup in rename map to get physical registers, then
// swap.
+/*
for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
if (reg_redir[i]) {
AlphaISA::IntReg temp = regs->intRegFile[i];
@@ -710,6 +363,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
regs->palregs[i] = temp;
}
}
+*/
}
#endif // FULL_SYSTEM
diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh
index ecde4b016..fb3468098 100644
--- a/cpu/beta_cpu/alpha_params.hh
+++ b/cpu/beta_cpu/alpha_params.hh
@@ -20,12 +20,12 @@ class MemInterface;
class AlphaSimpleParams : public BaseFullCPU::Params
{
public:
+
#ifdef FULL_SYSTEM
AlphaITB *itb; AlphaDTB *dtb;
#else
std::vector<Process *> workload;
Process *process;
- short asid;
#endif // FULL_SYSTEM
FunctionalMemory *mem;
diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh
index c0afe3d1b..18f76d921 100644
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@@ -50,7 +50,6 @@ struct SimpleIEWSimpleCommit {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
- unsigned globalHist;
InstSeqNum squashedSeqNum;
};
@@ -78,7 +77,6 @@ struct TimeBufStruct {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
- unsigned globalHist;
};
decodeComm decodeInfo;
@@ -113,12 +111,11 @@ struct TimeBufStruct {
bool branchTaken;
uint64_t mispredPC;
uint64_t nextPC;
- unsigned globalHist;
// Think of better names here.
// Will need to be a variety of sizes...
// Maybe make it a vector, that way only need one object.
- std::vector<PhysRegIndex> freeRegs;
+// std::vector<PhysRegIndex> freeRegs;
bool robSquashing;
@@ -129,7 +126,7 @@ struct TimeBufStruct {
// Extra bits of information so that the LDSTQ only updates when it
// needs to.
- bool commitIsStore;
+// bool commitIsStore;
bool commitIsLoad;
// Communication specifically to the IQ to tell the IQ that it can
diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh
index 731307bf7..c04dc8085 100644
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@@ -113,9 +113,6 @@ class SimpleCommit
/** Pointer to FullCPU. */
FullCPU *cpu;
- /** Pointer to the rename map. DO NOT USE if possible. */
-// typename Impl::CPUPol::RenameMap *renameMap;
-
//Store buffer interface? Will need to move committed stores to the
//store buffer
diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh
index 3e97b980c..17ede9694 100644
--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@@ -166,9 +166,9 @@ SimpleCommit<Impl>::commit()
// hwrei() is what resets the PC to the place where instruction execution
// beings again.
#ifdef FULL_SYSTEM
- if (ISA::check_interrupts &&
+ if (//checkInterrupts &&
cpu->check_interrupts() &&
- !xc->inPalMode()) {
+ !cpu->inPalMode(readCommitPC())) {
// Will need to squash all instructions currently in flight and have
// the interrupt handler restart at the last non-committed inst.
// Most of that can be handled through the trap() function. The
@@ -215,8 +215,6 @@ SimpleCommit<Impl>::commit()
toIEW->commitInfo.mispredPC = fromIEW->mispredPC;
- toIEW->commitInfo.globalHist = fromIEW->globalHist;
-
if (toIEW->commitInfo.branchMispredict) {
++branchMispredicts;
}
@@ -257,6 +255,9 @@ SimpleCommit<Impl>::commitInsts()
// Can't commit and squash things at the same time...
////////////////////////////////////
+ if (rob->isEmpty())
+ return;
+
DynInstPtr head_inst = rob->readHeadInst();
unsigned num_committed = 0;
@@ -275,9 +276,11 @@ SimpleCommit<Impl>::commitInsts()
if (head_inst->isSquashed()) {
// Hack to avoid the instruction being retired (and deleted) if
// it hasn't been through the IEW stage yet.
+/*
if (!head_inst->isExecuted()) {
break;
}
+*/
DPRINTF(Commit, "Commit: Retiring squashed instruction from "
"ROB.\n");
@@ -341,7 +344,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
cpu->funcExeInst--;
- if (head_inst->isStore() || head_inst->isNonSpeculative()) {
+ if (head_inst->isNonSpeculative()) {
DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
"instruction at the head of the ROB, PC %#x.\n",
head_inst->readPC());
@@ -376,12 +379,14 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
}
// Check if the instruction caused a fault. If so, trap.
- if (head_inst->getFault() != No_Fault) {
+ Fault inst_fault = head_inst->getFault();
+
+ if (inst_fault != No_Fault && inst_fault != Fake_Mem_Fault) {
if (!head_inst->isNop()) {
#ifdef FULL_SYSTEM
- cpu->trap(fault);
+ cpu->trap(inst_fault);
#else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
+ panic("fault (%d) detected @ PC %08p", inst_fault,
head_inst->PC);
#endif // FULL_SYSTEM
}
@@ -390,7 +395,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Check if we're really ready to commit. If not then return false.
// I'm pretty sure all instructions should be able to commit if they've
// reached this far. For now leave this in as a check.
- if(!rob->isHeadReady()) {
+ if (!rob->isHeadReady()) {
panic("Commit: Unable to commit head instruction!\n");
return false;
}
@@ -413,17 +418,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
++commitCommittedBranches;
}
-
#if 0
- // Check if the instruction has a destination register.
- // If so add the previous physical register of its logical register's
- // destination to the free list through the time buffer.
- for (int i = 0; i < head_inst->numDestRegs(); i++)
- {
- toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i));
- }
-#endif
-
// Explicit communication back to the LDSTQ that a load has been committed
// and can be removed from the LDSTQ. Stores don't need this because
// the LDSTQ will already have been told that a store has reached the head
@@ -436,6 +431,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
++commitCommittedLoads;
}
}
+#endif
// Now that the instruction is going to be committed, finalize its
// trace data.
@@ -487,7 +483,7 @@ SimpleCommit<Impl>::markCompletedInsts()
// Grab completed insts out of the IEW instruction queue, and mark
// instructions completed within the ROB.
for (int inst_num = 0;
- inst_num < iewWidth && fromIEW->insts[inst_num];
+ inst_num < fromIEW->size && fromIEW->insts[inst_num];
++inst_num)
{
DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n",
diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh
index 6606aba29..50d231609 100644
--- a/cpu/beta_cpu/cpu_policy.hh
+++ b/cpu/beta_cpu/cpu_policy.hh
@@ -34,7 +34,7 @@ struct SimpleCPUPolicy
typedef SimpleFetch<Impl> Fetch;
typedef SimpleDecode<Impl> Decode;
typedef SimpleRename<Impl> Rename;
- typedef SimpleIEW<Impl, IQ> IEW;
+ typedef SimpleIEW<Impl> IEW;
typedef SimpleCommit<Impl> Commit;
/** The struct for communication between fetch and decode. */
diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh
index dd18cf176..af2a5ee54 100644
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@@ -68,12 +68,16 @@ class SimpleDecode
void squash();
private:
+ inline bool fetchInstsValid();
+
void block();
inline void unblock();
void squash(DynInstPtr &inst);
+ void dumpFetchQueue();
+
// Interfaces to objects outside of decode.
/** CPU interface. */
FullCPU *cpu;
diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh
index 9d88f94ac..43a4e8e95 100644
--- a/cpu/beta_cpu/decode_impl.hh
+++ b/cpu/beta_cpu/decode_impl.hh
@@ -99,6 +99,13 @@ SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
}
template<class Impl>
+inline bool
+SimpleDecode<Impl>::fetchInstsValid()
+{
+ return fromFetch->size > 0;
+}
+
+template<class Impl>
void
SimpleDecode<Impl>::block()
{
@@ -156,14 +163,14 @@ SimpleDecode<Impl>::squash(DynInstPtr &inst)
// Set status to squashing.
_status = Squashing;
- // Maybe advance the time buffer? Not sure what to do in the normal
- // case.
-
// Clear the skid buffer in case it has any data in it.
- while (!skidBuffer.empty())
- {
+ while (!skidBuffer.empty()) {
skidBuffer.pop();
}
+
+ // Squash instructions up until this one
+ // Slightly unrealistic!
+ cpu->removeInstsUntil(inst->seqNum);
}
template<class Impl>
@@ -205,7 +212,7 @@ SimpleDecode<Impl>::tick()
if (_status == Unblocking) {
++decodeUnblockCycles;
- if (fromFetch->size > 0) {
+ if (fetchInstsValid()) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidBuffer.push(*fromFetch);
@@ -216,7 +223,7 @@ SimpleDecode<Impl>::tick()
} else if (_status == Blocked) {
++decodeBlockedCycles;
- if (fromFetch->size > 0) {
+ if (fetchInstsValid()) {
block();
}
@@ -240,12 +247,12 @@ SimpleDecode<Impl>::tick()
squash();
}
} else if (_status == Squashing) {
- ++decodeSquashCycles;
-
if (!fromCommit->commitInfo.squash &&
!fromCommit->commitInfo.robSquashing) {
_status = Running;
} else if (fromCommit->commitInfo.squash) {
+ ++decodeSquashCycles;
+
squash();
}
}
@@ -264,8 +271,7 @@ SimpleDecode<Impl>::decode()
// Check time buffer if being told to stall.
if (fromRename->renameInfo.stall ||
fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall)
- {
+ fromCommit->commitInfo.stall) {
block();
return;
}
@@ -273,7 +279,7 @@ SimpleDecode<Impl>::decode()
// Check fetch queue to see if instructions are available.
// If no available instructions, do nothing, unless this stage is
// currently unblocking.
- if (fromFetch->size == 0 && _status != Unblocking) {
+ if (!fetchInstsValid() && _status != Unblocking) {
DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
// Should I change the status to idle?
++decodeIdleCycles;
@@ -286,7 +292,7 @@ SimpleDecode<Impl>::decode()
unsigned to_rename_index = 0;
int insts_available = _status == Unblocking ?
- skidBuffer.front().size :
+ skidBuffer.front().size - numInst :
fromFetch->size;
// Debug block...
@@ -308,8 +314,8 @@ SimpleDecode<Impl>::decode()
}
#endif
- while (insts_available > 0)
- {
+ while (insts_available > 0)
+ {
DPRINTF(Decode, "Decode: Sending instruction to rename.\n");
inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
@@ -331,6 +337,16 @@ SimpleDecode<Impl>::decode()
continue;
}
+
+ // Also check if instructions have no source registers. Mark
+ // them as ready to issue at any time. Not sure if this check
+ // should exist here or at a later stage; however it doesn't matter
+ // too much for function correctness.
+ // Isn't this handled by the inst queue?
+ if (inst->numSrcRegs() == 0) {
+ inst->setCanIssue();
+ }
+
// This current instruction is valid, so add it into the decode
// queue. The next instruction may not be valid, so check to
// see if branches were predicted correctly.
@@ -369,16 +385,6 @@ SimpleDecode<Impl>::decode()
// addr (either the immediate, or the branch PC + 4) and redirect
// fetch if it's incorrect.
-
- // Also check if instructions have no source registers. Mark
- // them as ready to issue at any time. Not sure if this check
- // should exist here or at a later stage; however it doesn't matter
- // too much for function correctness.
- // Isn't this handled by the inst queue?
- if (inst->numSrcRegs() == 0) {
- inst->setCanIssue();
- }
-
// Increment which instruction we're looking at.
++numInst;
++to_rename_index;
diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh
index 7a3893708..da22baa9b 100644
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@@ -74,7 +74,6 @@ class SimpleFetch
void processCacheCompletion();
-// private:
// Figure out PC vs next PC and how it should be updated
void squash(const Addr &new_PC);
@@ -93,9 +92,6 @@ class SimpleFetch
*/
bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);
- // Might not want this function...
-// inline void recordGlobalHist(DynInstPtr &inst);
-
/**
* Fetches the cache line that contains fetch_PC. Returns any
* fault that happened. Puts the data into the class variable
@@ -184,9 +180,6 @@ class SimpleFetch
/** Mask to get a cache block's address. */
Addr cacheBlkMask;
- /** The instruction being fetched. */
-// MachInst inst;
-
/** The cache line being fetched. */
uint8_t *cacheData;
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
index 90caf9ffe..0ec4c63a3 100644
--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -44,6 +44,8 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
commitToFetchDelay(params.commitToFetchDelay),
fetchWidth(params.fetchWidth)
{
+ DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
+
// Set status to idle.
_status = Idle;
@@ -52,7 +54,7 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
// Not sure of this parameter. I think it should be based on the
// thread number.
#ifndef FULL_SYSTEM
- memReq->asid = params.asid;
+ memReq->asid = 0;
#else
memReq->asid = 0;
#endif // FULL_SYSTEM
@@ -163,21 +165,10 @@ SimpleFetch<Impl>::processCacheCompletion()
// to return.
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
- // How to handle an outstanding miss which gets cancelled due to squash,
- // then a new icache miss gets scheduled?
if (_status == IcacheMissStall)
_status = IcacheMissComplete;
}
-#if 0
-template <class Impl>
-inline void
-SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
-{
- inst->setGlobalHist(branchPred.BPReadGlobalHist());
-}
-#endif
-
template <class Impl>
bool
SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
@@ -311,7 +302,6 @@ SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num);
-
}
template <class Impl>
@@ -428,7 +418,9 @@ SimpleFetch<Impl>::tick()
// Switch status to running
_status = Running;
- ++fetchSquashCycles;
+ ++fetchCycles;
+
+ fetch();
} else if (_status != IcacheMissStall) {
DPRINTF(Fetch, "Fetch: Running stage.\n");
diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc
index 04c74393b..3cf5d4aaa 100644
--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@@ -16,7 +16,7 @@
using namespace std;
BaseFullCPU::BaseFullCPU(Params &params)
- : BaseCPU(&params)
+ : BaseCPU(&params), cpu_id(0)
{
}
@@ -82,15 +82,14 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
#ifdef FULL_SYSTEM
system(params.system),
- memCtrl(system->memCtrl),
+ memCtrl(system->memctrl),
physmem(system->physmem),
itb(params.itb),
dtb(params.dtb),
mem(params.mem),
#else
- process(params.process),
- asid(params.asid),
- mem(process->getMemory()),
+ // Hardcoded for a single thread!!
+ mem(params.workload[0]->getMemory()),
#endif // FULL_SYSTEM
icacheInterface(params.icacheInterface),
@@ -100,20 +99,40 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
funcExeInst(0)
{
_status = Idle;
+
+#ifndef FULL_SYSTEM
+ thread.resize(this->number_of_threads);
+#endif
+
+ for (int i = 0; i < this->number_of_threads; ++i) {
#ifdef FULL_SYSTEM
- xc = new ExecContext(this, 0, system, itb, dtb, mem);
+ assert(i == 0);
+ system->execContexts[i] =
+ new ExecContext(this, i, system, itb, dtb, mem);
- // initialize CPU, including PC
- TheISA::initCPU(&xc->regs);
+ // initialize CPU, including PC
+ TheISA::initCPU(&system->execContexts[i]->regs);
+ execContexts.push_back(system->execContexts[i]);
#else
- DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x",
- process->prog_entry, process);
- xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0);
-
- assert(process->getMemory() != NULL);
- assert(mem != NULL);
+ if (i < params.workload.size()) {
+ DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
+ "process is %#x",
+ i, params.workload[i]->prog_entry, thread[i]);
+ thread[i] = new ExecContext(this, i, params.workload[i], i);
+ }
+ assert(params.workload[i]->getMemory() != NULL);
+ assert(mem != NULL);
+ execContexts.push_back(thread[i]);
#endif // !FULL_SYSTEM
- execContexts.push_back(xc);
+ }
+
+ // Note that this is a hack so that my code which still uses xc-> will
+ // still work. I should remove this eventually
+#ifdef FULL_SYSTEM
+ xc = system->execContexts[0];
+#else
+ xc = thread[0];
+#endif
// The stages also need their CPU pointer setup. However this must be
// done at the upper level CPU because they have pointers to the upper
@@ -202,29 +221,33 @@ FullBetaCPU<Impl>::init()
// Need to do a copy of the xc->regs into the CPU's regfile so
// that it can start properly.
-
+#ifdef FULL_SYSTEM
+ ExecContext *src_xc = system->execContexts[0];
+#else
+ ExecContext *src_xc = thread[0];
+#endif
// First loop through the integer registers.
for (int i = 0; i < Impl::ISA::NumIntRegs; ++i)
{
- regFile.intRegFile[i] = xc->regs.intRegFile[i];
+ regFile.intRegFile[i] = src_xc->regs.intRegFile[i];
}
// Then loop through the floating point registers.
for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i)
{
- regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i];
- regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i];
+ regFile.floatRegFile[i].d = src_xc->regs.floatRegFile.d[i];
+ regFile.floatRegFile[i].q = src_xc->regs.floatRegFile.q[i];
}
// Then loop through the misc registers.
- regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr;
- regFile.miscRegs.uniq = xc->regs.miscRegs.uniq;
- regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag;
- regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr;
+ regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr;
+ regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq;
+ regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag;
+ regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr;
// Then finally set the PC and the next PC.
- regFile.pc = xc->regs.pc;
- regFile.npc = xc->regs.npc;
+ regFile.pc = src_xc->regs.pc;
+ regFile.npc = src_xc->regs.npc;
}
}
@@ -277,13 +300,13 @@ FullBetaCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
// Set all status's to active, schedule the
// CPU's tick event.
- tickEvent.schedule(curTick);
for (int i = 0; i < execContexts.size(); ++i) {
- execContexts[i]->activate();
+ ExecContext *xc = execContexts[i];
+ if (xc->status() == ExecContext::Active && _status != Running) {
+ _status = Running;
+ tickEvent.schedule(curTick);
+ }
}
-
- // Switch out the other CPU.
- oldCPU->switchOut();
}
template <class Impl>
@@ -463,6 +486,7 @@ FullBetaCPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
inst_to_delete->seqNum, inst_to_delete->readPC());
// Remove the instruction from the list.
+ instList.back() = NULL;
instList.pop_back();
// Mark it as squashed.
diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh
index 8ce32b7c7..85fc49371 100644
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@@ -5,11 +5,12 @@
//itself properly. Constructor. Derived alpha class. Threads!
// Avoid running stages and advancing queues if idle/stalled.
-#ifndef __SIMPLE_FULL_CPU_HH__
-#define __SIMPLE_FULL_CPU_HH__
+#ifndef __CPU_BETA_CPU_FULL_CPU_HH__
+#define __CPU_BETA_CPU_FULL_CPU_HH__
#include <iostream>
#include <list>
+#include <vector>
#include "cpu/beta_cpu/comm.hh"
@@ -20,6 +21,11 @@
#include "cpu/beta_cpu/cpu_policy.hh"
#include "sim/process.hh"
+#ifdef FULL_SYSTEM
+#include "arch/alpha/ev5.hh"
+using namespace EV5;
+#endif
+
class FunctionalMemory;
class Process;
@@ -34,6 +40,9 @@ class BaseFullCPU : public BaseCPU
#else
BaseFullCPU(Params &params);
#endif // FULL_SYSTEM
+
+ private:
+ int cpu_id;
};
template <class Impl>
@@ -41,6 +50,7 @@ class FullBetaCPU : public BaseFullCPU
{
public:
//Put typedefs from the Impl here.
+ typedef typename Impl::ISA ISA;
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::Params Params;
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -114,19 +124,21 @@ class FullBetaCPU : public BaseFullCPU
bool validDataAddr(Addr addr) { return true; }
/** Get instruction asid. */
- int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); }
+ int getInstAsid()
+ { return ITB_ASN_ASN(regFile.getIpr()[ISA::IPR_ITB_ASN]); }
/** Get data asid. */
- int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); }
+ int getDataAsid()
+ { return DTB_ASN_ASN(regFile.getIpr()[ISA::IPR_DTB_ASN]); }
#else
bool validInstAddr(Addr addr)
- { return process->validInstAddr(addr); }
+ { return thread[0]->validInstAddr(addr); }
bool validDataAddr(Addr addr)
- { return process->validDataAddr(addr); }
+ { return thread[0]->validDataAddr(addr); }
- int getInstAsid() { return asid; }
- int getDataAsid() { return asid; }
+ int getInstAsid() { return thread[0]->asid; }
+ int getDataAsid() { return thread[0]->asid; }
#endif
@@ -284,7 +296,14 @@ class FullBetaCPU : public BaseFullCPU
ExecContext *xc;
/** Temporary function to get pointer to exec context. */
- ExecContext *xcBase() { return xc; }
+ ExecContext *xcBase()
+ {
+#ifdef FULL_SYSTEM
+ return system->execContexts[0];
+#else
+ return thread[0];
+#endif
+ }
InstSeqNum globalSeqNum;
@@ -299,12 +318,7 @@ class FullBetaCPU : public BaseFullCPU
// SWContext *swCtx;
#else
- Process *process;
-
- // Address space ID. Note that this is used for TIMING cache
- // simulation only; all functional memory accesses should use
- // one of the FunctionalMemory pointers above.
- short asid;
+ std::vector<ExecContext *> thread;
#endif
FunctionalMemory *mem;
diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc
index a90d64434..626c4a90f 100644
--- a/cpu/beta_cpu/iew.cc
+++ b/cpu/beta_cpu/iew.cc
@@ -4,4 +4,4 @@
#include "cpu/beta_cpu/iew_impl.hh"
#include "cpu/beta_cpu/inst_queue.hh"
-template class SimpleIEW<AlphaSimpleImpl, AlphaSimpleImpl::CPUPol::IQ>;
+template class SimpleIEW<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh
index e3e7c6db5..1e5eb2244 100644
--- a/cpu/beta_cpu/iew.hh
+++ b/cpu/beta_cpu/iew.hh
@@ -14,7 +14,7 @@
//Can IEW even stall? Space should be available/allocated already...maybe
//if there's not enough write ports on the ROB or waiting for CDB
//arbitration.
-template<class Impl, class IQ>
+template<class Impl>
class SimpleIEW
{
private:
@@ -25,6 +25,7 @@ class SimpleIEW
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::Params Params;
+ typedef typename CPUPol::IQ IQ;
typedef typename CPUPol::RenameMap RenameMap;
typedef typename CPUPol::LDSTQ LDSTQ;
@@ -33,6 +34,7 @@ class SimpleIEW
typedef typename CPUPol::RenameStruct RenameStruct;
typedef typename CPUPol::IssueStruct IssueStruct;
+ friend class Impl::FullCPU;
public:
enum Status {
Running,
@@ -49,15 +51,17 @@ class SimpleIEW
Status _wbStatus;
public:
- void squash();
-
- void squashDueToBranch(DynInstPtr &inst);
-
- void squashDueToMem(DynInstPtr &inst);
+ class WritebackEvent : public Event {
+ private:
+ DynInstPtr inst;
+ SimpleIEW<Impl> *iewStage;
- void block();
+ public:
+ WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew);
- inline void unblock();
+ virtual void process();
+ virtual const char *description();
+ };
public:
SimpleIEW(Params &params);
@@ -74,17 +78,30 @@ class SimpleIEW
void setRenameMap(RenameMap *rm_ptr);
- void wakeDependents(DynInstPtr &inst);
+ void squash();
- void tick();
+ void squashDueToBranch(DynInstPtr &inst);
- void iew();
+ void squashDueToMem(DynInstPtr &inst);
+
+ void block();
+
+ inline void unblock();
+
+ void wakeDependents(DynInstPtr &inst);
+
+ void instToCommit(DynInstPtr &inst);
private:
void dispatchInsts();
void executeInsts();
+ public:
+ void tick();
+
+ void iew();
+
//Interfaces to objects inside and outside of IEW.
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -121,11 +138,18 @@ class SimpleIEW
/** Skid buffer between rename and IEW. */
std::queue<RenameStruct> skidBuffer;
+ protected:
/** Instruction queue. */
IQ instQueue;
LDSTQ ldstQueue;
+#ifndef FULL_SYSTEM
+ public:
+ void lsqWriteback();
+#endif
+
+ private:
/** Pointer to rename map. Might not want this stage to directly
* access this though...
*/
diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh
index 1d072ab33..086d39320 100644
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@@ -12,8 +12,36 @@
#include "base/timebuf.hh"
#include "cpu/beta_cpu/iew.hh"
-template<class Impl, class IQ>
-SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
+template<class Impl>
+SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst,
+ SimpleIEW<Impl> *_iew)
+ : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::WritebackEvent::process()
+{
+ DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n");
+
+ // Need to insert instruction into queue to commit
+ iewStage->instToCommit(inst);
+ // Need to execute second half of the instruction, do actual writing to
+ // registers and such
+ inst->execute();
+}
+
+template<class Impl>
+const char *
+SimpleIEW<Impl>::WritebackEvent::description()
+{
+ return "LSQ writeback event";
+}
+
+template<class Impl>
+SimpleIEW<Impl>::SimpleIEW(Params &params)
: // Just make this time buffer really big for now
issueToExecQueue(5, 5),
instQueue(params),
@@ -36,11 +64,13 @@ SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
// Instruction queue needs the queue between issue and execute.
instQueue.setIssueToExecuteQueue(&issueToExecQueue);
+
+ ldstQueue.setIEW(this);
}
-template <class Impl, class IQ>
+template <class Impl>
void
-SimpleIEW<Impl, IQ>::regStats()
+SimpleIEW<Impl>::regStats()
{
instQueue.regStats();
@@ -111,9 +141,9 @@ SimpleIEW<Impl, IQ>::regStats()
.desc("Number of branches that were predicted taken incorrectly");
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
+SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr)
{
DPRINTF(IEW, "IEW: Setting CPU pointer.\n");
cpu = cpu_ptr;
@@ -122,9 +152,9 @@ SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
ldstQueue.setCPU(cpu_ptr);
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
DPRINTF(IEW, "IEW: Setting time buffer pointer.\n");
timeBuffer = tb_ptr;
@@ -139,9 +169,9 @@ SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
instQueue.setTimeBuffer(tb_ptr);
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
DPRINTF(IEW, "IEW: Setting rename queue pointer.\n");
renameQueue = rq_ptr;
@@ -150,9 +180,9 @@ SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
fromRename = renameQueue->getWire(-renameToIEWDelay);
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n");
iewQueue = iq_ptr;
@@ -161,24 +191,70 @@ SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
toCommit = iewQueue->getWire(0);
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::setRenameMap(RenameMap *rm_ptr)
+SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr)
{
DPRINTF(IEW, "IEW: Setting rename map pointer.\n");
renameMap = rm_ptr;
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::wakeDependents(DynInstPtr &inst)
+SimpleIEW<Impl>::squash()
{
- instQueue.wakeDependents(inst);
+ DPRINTF(IEW, "IEW: Squashing all instructions.\n");
+ _status = Squashing;
+
+ // Tell the IQ to start squashing.
+ instQueue.squash();
+
+ // Tell the LDSTQ to start squashing.
+ ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+ DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
+ inst->PC);
+ // Perhaps leave the squashing up to the ROB stage to tell it when to
+ // squash?
+ _status = Squashing;
+
+ // Tell rename to squash through the time buffer.
+ toCommit->squash = true;
+ // Also send PC update information back to prior stages.
+ toCommit->squashedSeqNum = inst->seqNum;
+ toCommit->mispredPC = inst->readPC();
+ toCommit->nextPC = inst->readNextPC();
+ toCommit->branchMispredict = true;
+ // Prediction was incorrect, so send back inverse.
+ toCommit->branchTaken = inst->readNextPC() !=
+ (inst->readPC() + sizeof(MachInst));
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst)
+{
+ DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
+ inst->PC);
+ // Perhaps leave the squashing up to the ROB stage to tell it when to
+ // squash?
+ _status = Squashing;
+
+ // Tell rename to squash through the time buffer.
+ toCommit->squash = true;
+ // Also send PC update information back to prior stages.
+ toCommit->squashedSeqNum = inst->seqNum;
+ toCommit->nextPC = inst->readNextPC();
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::block()
+SimpleIEW<Impl>::block()
{
DPRINTF(IEW, "IEW: Blocking.\n");
// Set the status to Blocked.
@@ -193,9 +269,9 @@ SimpleIEW<Impl, IQ>::block()
// the previous stages are expected to check all possible stall signals.
}
-template<class Impl, class IQ>
+template<class Impl>
inline void
-SimpleIEW<Impl, IQ>::unblock()
+SimpleIEW<Impl>::unblock()
{
// Check if there's information in the skid buffer. If there is, then
// set status to unblocking, otherwise set it directly to running.
@@ -215,62 +291,24 @@ SimpleIEW<Impl, IQ>::unblock()
}
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::squash()
+SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst)
{
- DPRINTF(IEW, "IEW: Squashing all instructions.\n");
- _status = Squashing;
-
- // Tell the IQ to start squashing.
- instQueue.squash();
-
- // Tell the LDSTQ to start squashing.
- ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
+ instQueue.wakeDependents(inst);
}
-template<class Impl, class IQ>
-void
-SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst)
-{
- DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
- inst->PC);
- // Perhaps leave the squashing up to the ROB stage to tell it when to
- // squash?
- _status = Squashing;
-
- // Tell rename to squash through the time buffer.
- toCommit->squash = true;
- // Also send PC update information back to prior stages.
- toCommit->squashedSeqNum = inst->seqNum;
- toCommit->mispredPC = inst->readPC();
- toCommit->nextPC = inst->readNextPC();
- toCommit->branchMispredict = true;
- // Prediction was incorrect, so send back inverse.
- toCommit->branchTaken = inst->readNextPC() !=
- (inst->readPC() + sizeof(MachInst));
-}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst)
+SimpleIEW<Impl>::instToCommit(DynInstPtr &inst)
{
- DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
- inst->PC);
- // Perhaps leave the squashing up to the ROB stage to tell it when to
- // squash?
- _status = Squashing;
- // Tell rename to squash through the time buffer.
- toCommit->squash = true;
- // Also send PC update information back to prior stages.
- toCommit->squashedSeqNum = inst->seqNum;
- toCommit->nextPC = inst->readNextPC();
}
-template <class Impl, class IQ>
+template <class Impl>
void
-SimpleIEW<Impl, IQ>::dispatchInsts()
+SimpleIEW<Impl>::dispatchInsts()
{
////////////////////////////////////////
// DISPATCH/ISSUE stage
@@ -329,14 +367,14 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
// a signal to this stage to issue and execute that
// store. Change to be a bit that says the instruction
// has extra work to do at commit.
- inst->setCanCommit();
+// inst->setCanCommit();
- instQueue.insertNonSpec(inst);
+// instQueue.insertNonSpec(inst);
++iewDispStoreInsts;
- ++iewDispNonSpecInsts;
+// ++iewDispNonSpecInsts;
- continue;
+// continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
"encountered, skipping.\n");
@@ -385,9 +423,9 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
}
}
-template <class Impl, class IQ>
+template <class Impl>
void
-SimpleIEW<Impl, IQ>::executeInsts()
+SimpleIEW<Impl>::executeInsts()
{
////////////////////////////////////////
//EXECUTE/WRITEBACK stage
@@ -403,6 +441,8 @@ SimpleIEW<Impl, IQ>::executeInsts()
int fu_usage = 0;
bool fetch_redirect = false;
+ int inst_slot = 0;
+ int time_slot = 0;
// Execute/writeback any instructions that are available.
for (int inst_num = 0;
@@ -452,7 +492,7 @@ SimpleIEW<Impl, IQ>::executeInsts()
++iewExecLoadInsts;
} else if (inst->isStore()) {
- ldstQueue.executeStore();
+ ldstQueue.executeStore(inst);
++iewExecStoreInsts;
} else {
@@ -473,9 +513,23 @@ SimpleIEW<Impl, IQ>::executeInsts()
// For now naively assume that all instructions take one cycle.
// Otherwise would have to look into the time buffer based on the
// latency of the instruction.
+ (*iewQueue)[time_slot].insts[inst_slot];
+ while ((*iewQueue)[time_slot].insts[inst_slot]) {
+ if (inst_slot < issueWidth) {
+ ++inst_slot;
+ } else {
+ ++time_slot;
+ inst_slot = 0;
+ }
+
+ assert(time_slot < 5);
+ }
+
+ // May actually have to work this out, especially with loads and stores
// Add finished instruction to queue to commit.
- toCommit->insts[inst_num] = inst;
+ (*iewQueue)[time_slot].insts[inst_slot] = inst;
+ (*iewQueue)[time_slot].size++;
// Check if branch was correct. This check happens after the
// instruction is added to the queue because even if the branch
@@ -518,9 +572,9 @@ SimpleIEW<Impl, IQ>::executeInsts()
}
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::tick()
+SimpleIEW<Impl>::tick()
{
// Considering putting all the state-determining stuff in this section.
@@ -594,14 +648,20 @@ SimpleIEW<Impl, IQ>::tick()
// Write back number of free IQ entries here.
toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
+ ldstQueue.writebackStores();
+
// Check the committed load/store signals to see if there's a load
// or store to commit. Also check if it's being told to execute a
// nonspeculative instruction.
- if (fromCommit->commitInfo.commitIsStore) {
+ // This is pretty inefficient...
+// if (0/*fromCommit->commitInfo.commitIsStore*/) {
+ if (!fromCommit->commitInfo.squash &&
+ !fromCommit->commitInfo.robSquashing) {
ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
- } else if (fromCommit->commitInfo.commitIsLoad) {
+// } else if (fromCommit->commitInfo.commitIsLoad) {
ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
}
+// }
if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
@@ -611,9 +671,9 @@ SimpleIEW<Impl, IQ>::tick()
instQueue.numFreeEntries());
}
-template<class Impl, class IQ>
+template<class Impl>
void
-SimpleIEW<Impl, IQ>::iew()
+SimpleIEW<Impl>::iew()
{
// Might want to put all state checks in the tick() function.
// Check if being told to stall from commit.
@@ -663,3 +723,12 @@ SimpleIEW<Impl, IQ>::iew()
// Not the best place for it, but this works (hopefully).
issueToExecQueue.advance();
}
+
+#ifndef FULL_SYSTEM
+template<class Impl>
+void
+SimpleIEW<Impl>::lsqWriteback()
+{
+ ldstQueue.writebackAllInsts();
+}
+#endif
diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh
index 6fcce70a4..120e6b940 100644
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@@ -174,7 +174,7 @@ class InstructionQueue
* once the IQ gets a signal from commit. While it's redundant to
* have the key be a part of the value (the sequence number is stored
* inside of DynInst), when these instructions are woken up only
- * the sequence number will be available. Thus it is necessary to be
+ * the sequence number will be available. Thus it is most efficient to be
* able to search by the sequence number alone.
*/
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh
index c688181ed..d4e3939cf 100644
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@@ -31,8 +31,6 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
numPhysFloatRegs(params.numPhysFloatRegs),
commitToIEWDelay(params.commitToIEWDelay)
{
- DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth);
-
// Initialize the number of free IQ entries.
freeEntries = numEntries;
@@ -291,10 +289,6 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
// Decrease the number of free entries.
--freeEntries;
- // Look through its source registers (physical regs), and mark any
- // dependencies.
-// addToDependents(inst);
-
// Have this instruction set itself as the producer of its destination
// register(s).
createDependency(inst);
@@ -568,15 +562,20 @@ InstructionQueue<Impl>::scheduleReadyInsts()
break;
case Squashed:
- issuing_inst = squashed_head_inst;
+// issuing_inst = squashed_head_inst;
+ assert(0 && "Squashed insts should not issue any more!");
squashedInsts.pop();
+ // Set the squashed instruction as able to commit so that commit
+ // can just drop it from the ROB. This is a bit faked.
++squashed_issued;
+ ++freeEntries;
+
DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n",
- issuing_inst->readPC());
+ squashed_head_inst->readPC());
break;
}
- if (list_with_oldest != None) {
+ if (list_with_oldest != None && list_with_oldest != Squashed) {
i2e_info->insts[total_issued] = issuing_inst;
i2e_info->size++;
@@ -641,8 +640,10 @@ InstructionQueue<Impl>::squash()
// Setup the squash iterator to point to the tail.
squashIt = tail;
- // Call doSquash.
- doSquash();
+ // Call doSquash if there are insts in the IQ
+ if (freeEntries != numEntries) {
+ doSquash();
+ }
// Also tell the memory dependence unit to squash.
memDepUnit.squash(squashedSeqNum);
@@ -672,12 +673,12 @@ InstructionQueue<Impl>::doSquash()
// Remove the instruction from the dependency list.
// Hack for now: These below don't add themselves to the
// dependency list, so don't try to remove them.
- if (!squashed_inst->isNonSpeculative() &&
- !squashed_inst->isStore()) {
- int8_t total_src_regs = squashed_inst->numSrcRegs();
+ if (!squashed_inst->isNonSpeculative()/* &&
+ !squashed_inst->isStore()*/
+ ) {
for (int src_reg_idx = 0;
- src_reg_idx < total_src_regs;
+ src_reg_idx < squashed_inst->numSrcRegs();
src_reg_idx++)
{
PhysRegIndex src_reg =
@@ -699,6 +700,8 @@ InstructionQueue<Impl>::doSquash()
// Might want to remove producers as well.
} else {
+ nonSpecInsts[squashed_inst->seqNum] = NULL;
+
nonSpecInsts.erase(squashed_inst->seqNum);
++iqSquashedNonSpecRemoved;
@@ -709,7 +712,11 @@ InstructionQueue<Impl>::doSquash()
// Mark it as squashed within the IQ.
squashed_inst->setSquashedInIQ();
- squashedInsts.push(squashed_inst);
+// squashedInsts.push(squashed_inst);
+ squashed_inst->setIssued();
+ squashed_inst->setCanCommit();
+
+ ++freeEntries;
DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n",
squashed_inst->readPC());
@@ -718,6 +725,13 @@ InstructionQueue<Impl>::doSquash()
--squashIt;
++iqSquashedInstsExamined;
}
+
+ assert(freeEntries <= numEntries);
+
+ if (freeEntries == numEntries) {
+ tail = cpu->instList.end();
+ }
+
}
template <class Impl>
@@ -739,8 +753,6 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
//Look at the physical destination register of the DynInst
//and look it up on the dependency graph. Then mark as ready
//any instructions within the instruction queue.
- int8_t total_dest_regs = completed_inst->numDestRegs();
-
DependencyEntry *curr;
// Tell the memory dependence unit to wake any dependents on this
@@ -751,7 +763,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
}
for (int dest_reg_idx = 0;
- dest_reg_idx < total_dest_regs;
+ dest_reg_idx < completed_inst->numDestRegs();
dest_reg_idx++)
{
PhysRegIndex dest_reg =
@@ -759,7 +771,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
// Special case of uniq or control registers. They are not
// handled by the IQ and thus have no dependency graph entry.
- // @todo Figure out a cleaner way to handle thie.
+ // @todo Figure out a cleaner way to handle this.
if (dest_reg >= numPhysRegs) {
continue;
}
@@ -789,6 +801,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
DependencyEntry::mem_alloc_counter--;
+ curr->inst = NULL;
+
delete curr;
}
@@ -874,7 +888,10 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
dependGraph[dest_reg].inst = new_inst;
- assert(!dependGraph[dest_reg].next);
+ if (dependGraph[dest_reg].next) {
+ dumpDependGraph();
+ panic("IQ: Dependency graph not empty!");
+ }
// Mark the scoreboard to say it's not yet ready.
regScoreboard[dest_reg] = false;
@@ -929,34 +946,10 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
--mem_alloc_counter;
- delete curr;
-}
-
-template <class Impl>
-void
-InstructionQueue<Impl>::dumpDependGraph()
-{
- DependencyEntry *curr;
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
- for (int i = 0; i < numPhysRegs; ++i)
- {
- curr = &dependGraph[i];
-
- if (curr->inst) {
- cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
- curr->inst->readPC());
- } else {
- cprintf("dependGraph[%i]: No producer. consumer: ", i);
- }
-
- while (curr->next != NULL) {
- curr = curr->next;
-
- cprintf("%#x ", curr->inst->readPC());
- }
-
- cprintf("\n");
- }
+ delete curr;
}
template <class Impl>
@@ -1024,6 +1017,12 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
}
}
+/*
+ * Caution, this function must not be called prior to tail being updated at
+ * least once, otherwise it will fail the assertion. This is because
+ * instList.begin() actually changes upon the insertion of an element into the
+ * list when the list is empty.
+ */
template <class Impl>
int
InstructionQueue<Impl>::countInsts()
@@ -1031,6 +1030,9 @@ InstructionQueue<Impl>::countInsts()
ListIt count_it = cpu->instList.begin();
int total_insts = 0;
+ if (tail == cpu->instList.end())
+ return 0;
+
while (count_it != tail) {
if (!(*count_it)->isIssued()) {
++total_insts;
@@ -1053,6 +1055,33 @@ InstructionQueue<Impl>::countInsts()
template <class Impl>
void
+InstructionQueue<Impl>::dumpDependGraph()
+{
+ DependencyEntry *curr;
+
+ for (int i = 0; i < numPhysRegs; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
+ curr->inst->readPC());
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x ", curr->inst->readPC());
+ }
+
+ cprintf("\n");
+ }
+}
+
+template <class Impl>
+void
InstructionQueue<Impl>::dumpLists()
{
cprintf("Ready integer list size: %i\n", readyIntInsts.size());
diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh
index a81ed63bc..c9d1b092f 100644
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@@ -1,18 +1,26 @@
-#ifndef __REGFILE_HH__
-#define __REGFILE_HH__
+#ifndef __CPU_BETA_CPU_REGFILE_HH__
+#define __CPU_BETA_CPU_REGFILE_HH__
// @todo: Destructor
#include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
#include "cpu/beta_cpu/comm.hh"
-#include "base/trace.hh"
+#ifdef FULL_SYSTEM
+#include "kern/kernel_stats.hh"
+#include "arch/alpha/ev5.hh"
+
+using namespace EV5;
+#endif
// This really only depends on the ISA, and not the Impl. It might be nicer
// to see if I can make it depend on nothing...
// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
// and should go in the AlphaFullCPU.
+extern void debug_break();
+
template <class Impl>
class PhysRegFile
{
@@ -27,6 +35,7 @@ class PhysRegFile
//be private eventually with some accessor functions.
public:
typedef typename Impl::ISA ISA;
+ typedef typename Impl::FullCPU FullCPU;
PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs);
@@ -177,6 +186,7 @@ class PhysRegFile
#ifdef FULL_SYSTEM
uint64_t readIpr(int idx, Fault &fault);
Fault setIpr(int idx, uint64_t val);
+ InternalProcReg *getIpr() { return ipr; }
int readIntrFlag() { return intrflag; }
void setIntrFlag(int val) { intrflag = val; }
#endif
@@ -196,7 +206,21 @@ class PhysRegFile
Addr pc; // program counter
Addr npc; // next-cycle program counter
+#ifdef FULL_SYSTEM
private:
+ // This is ISA specifc stuff; remove it eventually once ISAImpl is used
+ IntReg palregs[NumIntRegs]; // PAL shadow registers
+ InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs
+ int intrflag; // interrupt flag
+ bool pal_shadow; // using pal_shadow registers
+#endif
+
+ private:
+ FullCPU *cpu;
+
+ public:
+ void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; }
+
unsigned numPhysicalIntRegs;
unsigned numPhysicalFloatRegs;
};
@@ -269,46 +293,42 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
case ISA::IPR_IPLR:
case ISA::IPR_INTID:
case ISA::IPR_PMCTR:
- // no side-effect
- retval = ipr[idx];
- break;
+ // no side-effect
+ retval = ipr[idx];
+ break;
case ISA::IPR_CC:
- retval |= ipr[idx] & ULL(0xffffffff00000000);
- retval |= curTick & ULL(0x00000000ffffffff);
- break;
+ retval |= ipr[idx] & ULL(0xffffffff00000000);
+ retval |= curTick & ULL(0x00000000ffffffff);
+ break;
case ISA::IPR_VA:
- // SFX: unlocks interrupt status registers
- retval = ipr[idx];
-
- if (!misspeculating())
- regs.intrlock = false;
- break;
+ retval = ipr[idx];
+ break;
case ISA::IPR_VA_FORM:
case ISA::IPR_MM_STAT:
case ISA::IPR_IFAULT_VA_FORM:
case ISA::IPR_EXC_MASK:
case ISA::IPR_EXC_SUM:
- retval = ipr[idx];
- break;
+ retval = ipr[idx];
+ break;
case ISA::IPR_DTB_PTE:
- {
- ISA::PTE &pte = dtb->index(!misspeculating());
-
- retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
- retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
- retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
- retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
- retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
- retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
- retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
- }
- break;
-
- // write only registers
+ {
+ typename ISA::PTE &pte = cpu->dtb->index(1);
+
+ retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+ retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+ retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+ retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+ retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+ retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+ retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+ }
+ break;
+
+ // write only registers
case ISA::IPR_HWINT_CLR:
case ISA::IPR_SL_XMIT:
case ISA::IPR_DC_FLUSH:
@@ -318,22 +338,19 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
case ISA::IPR_DTB_IAP:
case ISA::IPR_ITB_IA:
case ISA::IPR_ITB_IAP:
- fault = Unimplemented_Opcode_Fault;
- break;
+ fault = Unimplemented_Opcode_Fault;
+ break;
default:
- // invalid IPR
- fault = Unimplemented_Opcode_Fault;
- break;
+ // invalid IPR
+ fault = Unimplemented_Opcode_Fault;
+ break;
}
return retval;
}
-#ifdef DEBUG
-// Cause the simulator to break when changing to the following IPL
-int break_ipl = -1;
-#endif
+extern int break_ipl;
template <class Impl>
Fault
@@ -341,9 +358,6 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
{
uint64_t old;
- if (misspeculating())
- return No_Fault;
-
switch (idx) {
case ISA::IPR_PALtemp0:
case ISA::IPR_PALtemp1:
@@ -372,222 +386,225 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
case ISA::IPR_IC_PERR_STAT:
case ISA::IPR_DC_PERR_STAT:
case ISA::IPR_PMCTR:
- // write entire quad w/ no side-effect
- ipr[idx] = val;
- break;
+ // write entire quad w/ no side-effect
+ ipr[idx] = val;
+ break;
case ISA::IPR_CC_CTL:
- // This IPR resets the cycle counter. We assume this only
- // happens once... let's verify that.
- assert(ipr[idx] == 0);
- ipr[idx] = 1;
- break;
+ // This IPR resets the cycle counter. We assume this only
+ // happens once... let's verify that.
+ assert(ipr[idx] == 0);
+ ipr[idx] = 1;
+ break;
case ISA::IPR_CC:
- // This IPR only writes the upper 64 bits. It's ok to write
- // all 64 here since we mask out the lower 32 in rpcc (see
- // isa_desc).
- ipr[idx] = val;
- break;
+ // This IPR only writes the upper 64 bits. It's ok to write
+ // all 64 here since we mask out the lower 32 in rpcc (see
+ // isa_desc).
+ ipr[idx] = val;
+ break;
case ISA::IPR_PALtemp23:
- // write entire quad w/ no side-effect
- old = ipr[idx];
- ipr[idx] = val;
- kernelStats.context(old, val);
- break;
+ // write entire quad w/ no side-effect
+ old = ipr[idx];
+ ipr[idx] = val;
+// kernelStats.context(old, val);
+ break;
case ISA::IPR_DTB_PTE:
- // write entire quad w/ no side-effect, tag is forthcoming
- ipr[idx] = val;
- break;
+ // write entire quad w/ no side-effect, tag is forthcoming
+ ipr[idx] = val;
+ break;
case ISA::IPR_EXC_ADDR:
- // second least significant bit in PC is always zero
- ipr[idx] = val & ~2;
- break;
+ // second least significant bit in PC is always zero
+ ipr[idx] = val & ~2;
+ break;
case ISA::IPR_ASTRR:
case ISA::IPR_ASTER:
- // only write least significant four bits - privilege mask
- ipr[idx] = val & 0xf;
- break;
+ // only write least significant four bits - privilege mask
+ ipr[idx] = val & 0xf;
+ break;
case ISA::IPR_IPLR:
#ifdef DEBUG
- if (break_ipl != -1 && break_ipl == (val & 0x1f))
- debug_break();
+ if (break_ipl != -1 && break_ipl == (val & 0x1f))
+ debug_break();
#endif
- // only write least significant five bits - interrupt level
- ipr[idx] = val & 0x1f;
- kernelStats.swpipl(ipr[idx]);
- break;
+ // only write least significant five bits - interrupt level
+ ipr[idx] = val & 0x1f;
+// kernelStats.swpipl(ipr[idx]);
+ break;
case ISA::IPR_DTB_CM:
- kernelStats.mode((val & 0x18) != 0);
+// if (val & 0x18)
+// kernelStats->mode(Kernel::user);
+// else
+// kernelStats->mode(Kernel::kernel);
case ISA::IPR_ICM:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
case ISA::IPR_ALT_MODE:
- // only write two mode bits - processor mode
- ipr[idx] = val & 0x18;
- break;
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
case ISA::IPR_MCSR:
- // more here after optimization...
- ipr[idx] = val;
- break;
+ // more here after optimization...
+ ipr[idx] = val;
+ break;
case ISA::IPR_SIRR:
- // only write software interrupt mask
- ipr[idx] = val & 0x7fff0;
- break;
+ // only write software interrupt mask
+ ipr[idx] = val & 0x7fff0;
+ break;
case ISA::IPR_ICSR:
- ipr[idx] = val & ULL(0xffffff0300);
- break;
+ ipr[idx] = val & ULL(0xffffff0300);
+ break;
case ISA::IPR_IVPTBR:
case ISA::IPR_MVPTBR:
- ipr[idx] = val & ULL(0xffffffffc0000000);
- break;
+ ipr[idx] = val & ULL(0xffffffffc0000000);
+ break;
case ISA::IPR_DC_TEST_CTL:
- ipr[idx] = val & 0x1ffb;
- break;
+ ipr[idx] = val & 0x1ffb;
+ break;
case ISA::IPR_DC_MODE:
case ISA::IPR_MAF_MODE:
- ipr[idx] = val & 0x3f;
- break;
+ ipr[idx] = val & 0x3f;
+ break;
case ISA::IPR_ITB_ASN:
- ipr[idx] = val & 0x7f0;
- break;
+ ipr[idx] = val & 0x7f0;
+ break;
case ISA::IPR_DTB_ASN:
- ipr[idx] = val & ULL(0xfe00000000000000);
- break;
+ ipr[idx] = val & ULL(0xfe00000000000000);
+ break;
case ISA::IPR_EXC_SUM:
case ISA::IPR_EXC_MASK:
- // any write to this register clears it
- ipr[idx] = 0;
- break;
+ // any write to this register clears it
+ ipr[idx] = 0;
+ break;
case ISA::IPR_INTID:
case ISA::IPR_SL_RCV:
case ISA::IPR_MM_STAT:
case ISA::IPR_ITB_PTE_TEMP:
case ISA::IPR_DTB_PTE_TEMP:
- // read-only registers
- return Unimplemented_Opcode_Fault;
+ // read-only registers
+ return Unimplemented_Opcode_Fault;
case ISA::IPR_HWINT_CLR:
case ISA::IPR_SL_XMIT:
case ISA::IPR_DC_FLUSH:
case ISA::IPR_IC_FLUSH:
- // the following are write only
- ipr[idx] = val;
- break;
+ // the following are write only
+ ipr[idx] = val;
+ break;
case ISA::IPR_DTB_IA:
- // really a control write
- ipr[idx] = 0;
+ // really a control write
+ ipr[idx] = 0;
- dtb->flushAll();
- break;
+ cpu->dtb->flushAll();
+ break;
case ISA::IPR_DTB_IAP:
- // really a control write
- ipr[idx] = 0;
+ // really a control write
+ ipr[idx] = 0;
- dtb->flushProcesses();
- break;
+ cpu->dtb->flushProcesses();
+ break;
case ISA::IPR_DTB_IS:
- // really a control write
- ipr[idx] = val;
+ // really a control write
+ ipr[idx] = val;
- dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
- break;
+ cpu->dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
+ break;
case ISA::IPR_DTB_TAG: {
- struct ISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
- pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
- pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
- pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
- pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
- pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
- pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- dtb->insert(val, pte);
+ struct ISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
+ pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
+ pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
+ pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
+ pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
+ pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
+ pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ cpu->dtb->insert(val, pte);
}
- break;
+ break;
case ISA::IPR_ITB_PTE: {
- struct ISA::PTE pte;
-
- // FIXME: granularity hints NYI...
- if (ITB_PTE_GH(val) != 0)
- panic("PTE GH field != 0");
-
- // write entire quad
- ipr[idx] = val;
-
- // construct PTE for new entry
- pte.ppn = ITB_PTE_PPN(val);
- pte.xre = ITB_PTE_XRE(val);
- pte.xwe = 0;
- pte.fonr = ITB_PTE_FONR(val);
- pte.fonw = ITB_PTE_FONW(val);
- pte.asma = ITB_PTE_ASMA(val);
- pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);
-
- // insert new TAG/PTE value into data TLB
- itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
+ struct ISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (ITB_PTE_GH(val) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = ITB_PTE_PPN(val);
+ pte.xre = ITB_PTE_XRE(val);
+ pte.xwe = 0;
+ pte.fonr = ITB_PTE_FONR(val);
+ pte.fonw = ITB_PTE_FONW(val);
+ pte.asma = ITB_PTE_ASMA(val);
+ pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ cpu->itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
}
- break;
+ break;
case ISA::IPR_ITB_IA:
- // really a control write
- ipr[idx] = 0;
+ // really a control write
+ ipr[idx] = 0;
- itb->flushAll();
- break;
+ cpu->itb->flushAll();
+ break;
case ISA::IPR_ITB_IAP:
- // really a control write
- ipr[idx] = 0;
+ // really a control write
+ ipr[idx] = 0;
- itb->flushProcesses();
- break;
+ cpu->itb->flushProcesses();
+ break;
case ISA::IPR_ITB_IS:
- // really a control write
- ipr[idx] = val;
+ // really a control write
+ ipr[idx] = val;
- itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
- break;
+ cpu->itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
+ break;
default:
- // invalid IPR
- return Unimplemented_Opcode_Fault;
+ // invalid IPR
+ return Unimplemented_Opcode_Fault;
}
// no error...
@@ -596,4 +613,4 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
#endif // #ifdef FULL_SYSTEM
-#endif // __REGFILE_HH__
+#endif // __CPU_BETA_CPU_REGFILE_HH__
diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh
index da6b5232a..3e08def74 100644
--- a/cpu/beta_cpu/rob.hh
+++ b/cpu/beta_cpu/rob.hh
@@ -10,8 +10,6 @@
#include <utility>
#include <vector>
-//#include "arch/alpha/isa_traits.hh"
-
/**
* ROB class. Uses the instruction list that exists within the CPU to
* represent the ROB. This class doesn't contain that list, but instead
diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh
index 86c4e2db1..52d51028e 100644
--- a/cpu/beta_cpu/rob_impl.hh
+++ b/cpu/beta_cpu/rob_impl.hh
@@ -1,5 +1,5 @@
-#ifndef __ROB_IMPL_HH__
-#define __ROB_IMPL_HH__
+#ifndef __CPU_BETA_CPU_ROB_IMPL_HH__
+#define __CPU_BETA_CPU_ROB_IMPL_HH__
#include "cpu/beta_cpu/rob.hh"
@@ -107,10 +107,8 @@ ROB<Impl>::retireHead()
assert(numInstsInROB == countInsts());
assert(numInstsInROB > 0);
- DynInstPtr head_inst;
-
// Get the head ROB instruction.
- head_inst = cpu->instList.front();
+ DynInstPtr head_inst = cpu->instList.front();
// Make certain this can retire.
assert(head_inst->readyToCommit());
@@ -126,11 +124,10 @@ ROB<Impl>::retireHead()
// A special case is needed if the instruction being retired is the
// only instruction in the ROB; otherwise the tail iterator will become
// invalidated.
- if (tail == cpu->instList.begin()) {
- cpu->removeFrontInst(head_inst);
+ cpu->removeFrontInst(head_inst);
+
+ if (numInstsInROB == 0) {
tail = cpu->instList.end();
- } else {
- cpu->removeFrontInst(head_inst);
}
}
@@ -283,4 +280,4 @@ ROB<Impl>::readTailSeqNum()
return (*tail)->seqNum;
}
-#endif // __ROB_IMPL_HH__
+#endif // __CPU_BETA_CPU_ROB_IMPL_HH__
diff --git a/cpu/ooo_cpu/ooo_cpu.hh b/cpu/ooo_cpu/ooo_cpu.hh
index 25fdb39b6..ddbc3b061 100644
--- a/cpu/ooo_cpu/ooo_cpu.hh
+++ b/cpu/ooo_cpu/ooo_cpu.hh
@@ -122,7 +122,7 @@ class OoOCPU : public BaseCPU
enum Status {
Running,
Idle,
- IcacheMissStall,
+ IcacheMiss,
IcacheMissComplete,
DcacheMissStall,
SwitchedOut
@@ -161,6 +161,8 @@ class OoOCPU : public BaseCPU
virtual ~OoOCPU();
+ void init();
+
private:
void copyFromXC();
@@ -203,14 +205,21 @@ class OoOCPU : public BaseCPU
// Will need to create a cache completion event upon any memory miss.
ICacheCompletionEvent iCacheCompletionEvent;
+ class DCacheCompletionEvent;
+
+ typedef typename
+ std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
+
class DCacheCompletionEvent : public Event
{
private:
OoOCPU *cpu;
DynInstPtr inst;
+ DCacheCompEventIt dcceIt;
public:
- DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst);
+ DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
+ DCacheCompEventIt &_dcceIt);
virtual void process();
virtual const char *description();
@@ -218,6 +227,11 @@ class OoOCPU : public BaseCPU
friend class DCacheCompletionEvent;
+ protected:
+ std::list<DCacheCompletionEvent> dCacheCompList;
+ DCacheCompEventIt dcceIt;
+
+ private:
Status status() const { return _status; }
virtual void activateContext(int thread_num, int delay);
@@ -260,6 +274,8 @@ class OoOCPU : public BaseCPU
void processICacheCompletion();
+ public:
+
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
@@ -350,7 +366,7 @@ class OoOCPU : public BaseCPU
void commitHeadInst();
- bool grabInst();
+ bool getOneInst();
Fault fetchCacheLine();
@@ -471,6 +487,7 @@ class OoOCPU : public BaseCPU
// ROB tracking stuff.
DynInstPtr robHeadPtr;
DynInstPtr robTailPtr;
+ unsigned robSize;
unsigned robInsts;
// List of outstanding EA instructions.
@@ -545,10 +562,8 @@ OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
/*MemAccessResult result = */dcacheInterface->access(readReq);
if (dcacheInterface->doEvents()) {
- readReq->completionEvent = new DCacheCompletionEvent(this, inst);
- lastDcacheStall = curTick;
- unscheduleTickEvent();
- _status = DcacheMissStall;
+ readReq->completionEvent = new DCacheCompletionEvent(this, inst,
+ dcceIt);
}
}
@@ -579,7 +594,7 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
writeReq->reset(addr, sizeof(T), flags);
// translate to physical address
- Fault fault = xc->translateDataWriteReq(writeReq);
+ Fault fault = translateDataWriteReq(writeReq);
// do functional access
if (fault == No_Fault)
@@ -593,10 +608,8 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
/*MemAccessResult result = */dcacheInterface->access(writeReq);
if (dcacheInterface->doEvents()) {
- writeReq->completionEvent = new DCacheCompletionEvent(this, inst);
- lastDcacheStall = curTick;
- unscheduleTickEvent();
- _status = DcacheMissStall;
+ writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
+ dcceIt);
}
}
diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh
index 3ac88fd3d..4bbe8b636 100644
--- a/cpu/static_inst.hh
+++ b/cpu/static_inst.hh
@@ -41,16 +41,12 @@
// forward declarations
struct AlphaSimpleImpl;
-struct OoOImpl;
class ExecContext;
class DynInst;
template <class Impl>
class AlphaDynInst;
-template <class Impl>
-class OoODynInst;
-
class FastCPU;
class SimpleCPU;
class InorderCPU;
@@ -260,7 +256,7 @@ class StaticInst : public StaticInstBase
* obtain the dependence info (numSrcRegs and srcRegIdx[]) for
* just the EA computation.
*/
- virtual
+ virtual const
StaticInstPtr<ISA> &eaCompInst() const { return nullStaticInstPtr; }
/**
@@ -269,7 +265,7 @@ class StaticInst : public StaticInstBase
* obtain the dependence info (numSrcRegs and srcRegIdx[]) for
* just the memory access (not the EA computation).
*/
- virtual
+ virtual const
StaticInstPtr<ISA> &memAccInst() const { return nullStaticInstPtr; }
/// The binary machine instruction.
diff --git a/kern/kernel_stats.hh b/kern/kernel_stats.hh
index af93eb95c..66e9911b5 100644
--- a/kern/kernel_stats.hh
+++ b/kern/kernel_stats.hh
@@ -41,6 +41,9 @@
class BaseCPU;
class ExecContext;
class FnEvent;
+// What does kernel stats expect is included?
+class StaticInstBase;
+class System;
enum Fault;
namespace Kernel {
diff --git a/python/m5/objects/AlphaFullCPU.mpy b/python/m5/objects/AlphaFullCPU.mpy
new file mode 100644
index 000000000..bf3f2d718
--- /dev/null
+++ b/python/m5/objects/AlphaFullCPU.mpy
@@ -0,0 +1,79 @@
+from BaseCPU import BaseCPU
+
+simobj DerivAlphaFullCPU(BaseCPU):
+ type = 'DerivAlphaFullCPU'
+
+ numThreads = Param.Unsigned("number of HW thread contexts")
+
+ if not build_env['FULL_SYSTEM']:
+ mem = Param.FunctionalMemory(NULL, "memory")
+
+ decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+ renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+ iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+ "delay")
+ commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+ fetchWidth = Param.Unsigned("Fetch width")
+
+ renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+ iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+ "delay")
+ commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+ fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+ decodeWidth = Param.Unsigned("Decode width")
+
+ iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+ "delay")
+ commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+ decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+ renameWidth = Param.Unsigned("Rename width")
+
+ commitToIEWDelay = Param.Unsigned("Commit to "
+ "Issue/Execute/Writeback delay")
+ renameToIEWDelay = Param.Unsigned("Rename to "
+ "Issue/Execute/Writeback delay")
+ issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+ "to the IEW stage)")
+ issueWidth = Param.Unsigned("Issue width")
+ executeWidth = Param.Unsigned("Execute width")
+ executeIntWidth = Param.Unsigned("Integer execute width")
+ executeFloatWidth = Param.Unsigned("Floating point execute width")
+ executeBranchWidth = Param.Unsigned("Branch execute width")
+ executeMemoryWidth = Param.Unsigned("Memory execute width")
+
+ iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+ "delay")
+ renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+ commitWidth = Param.Unsigned("Commit width")
+ squashWidth = Param.Unsigned("Squash width")
+
+ local_predictor_size = Param.Unsigned("Size of local predictor")
+ local_ctr_bits = Param.Unsigned("Bits per counter")
+ local_history_table_size = Param.Unsigned("Size of local history table")
+ local_history_bits = Param.Unsigned("Bits for the local history")
+ global_predictor_size = Param.Unsigned("Size of global predictor")
+ global_ctr_bits = Param.Unsigned("Bits per counter")
+ global_history_bits = Param.Unsigned("Bits of history")
+ choice_predictor_size = Param.Unsigned("Size of choice predictor")
+ choice_ctr_bits = Param.Unsigned("Bits of choice counters")
+
+ BTBEntries = Param.Unsigned("Number of BTB entries")
+ BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+ RASSize = Param.Unsigned("RAS size")
+
+ LQEntries = Param.Unsigned("Number of load queue entries")
+ SQEntries = Param.Unsigned("Number of store queue entries")
+ LFSTSize = Param.Unsigned("Last fetched store table size")
+ SSITSize = Param.Unsigned("Store set ID table size")
+
+ numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+ numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+ "registers")
+ numIQEntries = Param.Unsigned("Number of instruction queue entries")
+ numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+ instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+ function_trace = Param.Bool(False, "Enable function trace")
+ function_trace_start = Param.Tick(0, "Cycle to start function trace")