summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu/base_dyn_inst.hh5
-rw-r--r--cpu/ozone/back_end_impl.hh2
-rw-r--r--cpu/ozone/cpu.hh28
-rw-r--r--cpu/ozone/cpu_builder.cc16
-rw-r--r--cpu/ozone/cpu_impl.hh118
-rw-r--r--cpu/ozone/dyn_inst.hh40
-rw-r--r--cpu/ozone/dyn_inst_impl.hh43
-rw-r--r--cpu/ozone/front_end.hh13
-rw-r--r--cpu/ozone/front_end_impl.hh58
-rw-r--r--cpu/ozone/lw_back_end.hh20
-rw-r--r--cpu/ozone/lw_back_end_impl.hh256
-rw-r--r--cpu/ozone/lw_lsq.hh32
-rw-r--r--cpu/ozone/lw_lsq_impl.hh189
-rw-r--r--cpu/ozone/simple_params.hh1
-rw-r--r--python/m5/objects/OzoneCPU.py3
15 files changed, 660 insertions, 164 deletions
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index 18978142d..cd754dc3c 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -117,11 +117,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
Fault write(T data, Addr addr, unsigned flags,
uint64_t *res);
- // @todo: Probably should not have this function in the DynInst.
- template <class T>
- bool snoop(MemReqPtr &req, T &data)
- { return cpu->snoop(req, data); }
-
void prefetch(Addr addr, unsigned flags);
void writeHint(Addr addr, int size, unsigned flags);
Fault copySrcTranslate(Addr src);
diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh
index 0b0f04f59..36770d65c 100644
--- a/cpu/ozone/back_end_impl.hh
+++ b/cpu/ozone/back_end_impl.hh
@@ -1385,7 +1385,7 @@ BackEnd<Impl>::writebackInsts()
inst->seqNum, inst->readPC());
inst->setCanCommit();
- inst->setCompleted();
+ inst->setResultReady();
if (inst->isExecuted()) {
int dependents = IQ.wakeDependents(inst);
diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 56b6571a2..eec8902d8 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -53,6 +53,7 @@ class AlphaDTB;
class PhysicalMemory;
class MemoryController;
+class Sampler;
class RemoteGDB;
class GDBListener;
@@ -69,6 +70,9 @@ namespace Trace {
class InstRecord;
}
+template <class>
+class Checker;
+
/**
* Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
* simple out-of-order capabilities added to it. It is still a 1 CPI machine
@@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU
};
// execution context proxy
- OzoneXC xcProxy;
+ OzoneXC ozoneXC;
+ ExecContext *xcProxy;
+ ExecContext *checkerXC;
typedef OzoneThreadState<Impl> ImplState;
@@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU
void tick();
std::set<InstSeqNum> snList;
+ std::set<Addr> lockAddrList;
private:
struct TickEvent : public Event
{
@@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
- tickEvent.reschedule(curTick + delay);
+ tickEvent.reschedule(curTick + cycles(delay));
else if (!tickEvent.scheduled())
- tickEvent.schedule(curTick + delay);
+ tickEvent.schedule(curTick + cycles(delay));
}
/// Unschedule tick event, regardless of its current state.
@@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU
int cpuId;
- void switchOut();
+ void switchOut(Sampler *sampler);
void takeOverFrom(BaseCPU *oldCPU);
#if FULL_SYSTEM
@@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU
Fault error;
if (req->flags & LOCKED) {
// lockAddr = req->paddr;
+ lockAddrList.insert(req->paddr);
lockFlag = true;
}
@@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU
req->result = 2;
} else {
if (this->lockFlag/* && this->lockAddr == req->paddr*/) {
- req->result = 1;
+ if (lockAddrList.find(req->paddr) !=
+ lockAddrList.end()) {
+ req->result = 1;
+ } else {
+ req->result = 0;
+ return NoFault;
+ }
} else {
req->result = 0;
return NoFault;
@@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU
void setSyscallReturn(SyscallReturn return_value, int tid);
#endif
- ExecContext *xcBase() { return &xcProxy; }
+ ExecContext *xcBase() { return xcProxy; }
bool decoupledFrontEnd;
struct CommStruct {
@@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU
bool lockFlag;
Stats::Scalar<> quiesceCycles;
+
+ Checker<DynInstPtr> *checker;
};
#endif // __CPU_OZONE_CPU_HH__
diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc
index 0146dd1bd..64aa49c71 100644
--- a/cpu/ozone/cpu_builder.cc
+++ b/cpu/ozone/cpu_builder.cc
@@ -1,6 +1,7 @@
#include <string>
+#include "cpu/checker/cpu.hh"
#include "cpu/inst_seq.hh"
#include "cpu/ozone/cpu.hh"
#include "cpu/ozone/ozone_impl.hh"
@@ -50,6 +51,8 @@ SimObjectVectorParam<Process *> workload;
SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<BaseCPU *> checker;
+
Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
@@ -66,6 +69,7 @@ Param<unsigned> backEndSquashLatency;
Param<unsigned> backEndLatency;
Param<unsigned> maxInstBufferSize;
Param<unsigned> numPhysicalRegs;
+Param<unsigned> maxOutstandingMemOps;
Param<unsigned> decodeToFetchDelay;
Param<unsigned> renameToFetchDelay;
@@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM_DFLT(mem, "Memory", NULL),
+ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
INIT_PARAM_DFLT(max_insts_any_thread,
"Terminate when any thread reaches this inst count",
0),
@@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+ INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
@@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
#endif // FULL_SYSTEM
params->mem = mem;
-
+ params->checker = checker;
params->max_insts_any_thread = max_insts_any_thread;
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
@@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->backEndLatency = backEndLatency;
params->maxInstBufferSize = maxInstBufferSize;
params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+ params->maxOutstandingMemOps = maxOutstandingMemOps;
params->decodeToFetchDelay = decodeToFetchDelay;
params->renameToFetchDelay = renameToFetchDelay;
@@ -445,6 +453,8 @@ SimObjectVectorParam<Process *> workload;
SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<BaseCPU *> checker;
+
Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
@@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
INIT_PARAM_DFLT(mem, "Memory", NULL),
+ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
INIT_PARAM_DFLT(max_insts_any_thread,
"Terminate when any thread reaches this inst count",
0),
@@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
#endif // FULL_SYSTEM
params->mem = mem;
-
+ params->checker = checker;
params->max_insts_any_thread = max_insts_any_thread;
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 17d944e7c..4f3fdf521 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -33,6 +33,7 @@
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/base.hh"
+#include "cpu/checker/exec_context.hh"
#include "cpu/exec_context.hh"
#include "cpu/exetrace.hh"
#include "cpu/ozone/cpu.hh"
@@ -156,17 +157,33 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
#endif
comm(5, 5)
{
-
+ if (p->checker) {
+ BaseCPU *temp_checker = p->checker;
+ checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
+ } else {
+ checker = NULL;
+ }
frontEnd = new FrontEnd(p);
backEnd = new BackEnd(p);
_status = Idle;
- thread.xcProxy = &xcProxy;
+ if (checker) {
+ checker->setMemory(mem);
+#if FULL_SYSTEM
+ checker->setSystem(p->system);
+#endif
+ checkerXC = new CheckerExecContext<OzoneXC>(&ozoneXC, checker);
+ thread.xcProxy = checkerXC;
+ xcProxy = checkerXC;
+ } else {
+ thread.xcProxy = &ozoneXC;
+ xcProxy = &ozoneXC;
+ }
thread.inSyscall = false;
- xcProxy.cpu = this;
- xcProxy.thread = &thread;
+ ozoneXC.cpu = this;
+ ozoneXC.thread = &thread;
thread.setStatus(ExecContext::Suspended);
#if FULL_SYSTEM
@@ -177,7 +194,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.tid = 0;
thread.mem = p->mem;
- thread.quiesceEvent = new EndQuiesceEvent(&xcProxy);
+ thread.quiesceEvent = new EndQuiesceEvent(xcProxy);
system = p->system;
itb = p->itb;
@@ -187,9 +204,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
if (p->profile) {
thread.profile = new FunctionProfile(p->system->kernelSymtab);
+ // @todo: This might be better as an ExecContext instead of OzoneXC
Callback *cb =
new MakeCallback<OzoneXC,
- &OzoneXC::dumpFuncProfile>(&xcProxy);
+ &OzoneXC::dumpFuncProfile>(&ozoneXC);
registerExitCallback(cb);
}
@@ -198,7 +216,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
static ProfileNode dummyNode;
thread.profileNode = &dummyNode;
thread.profilePC = 3;
-
#else
// xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0);
thread.cpu = this;
@@ -225,13 +242,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
issueWidth = p->issueWidth;
*/
- execContexts.push_back(&xcProxy);
+ execContexts.push_back(xcProxy);
frontEnd->setCPU(this);
backEnd->setCPU(this);
- frontEnd->setXC(&xcProxy);
- backEnd->setXC(&xcProxy);
+ frontEnd->setXC(xcProxy);
+ backEnd->setXC(xcProxy);
frontEnd->setThreadState(&thread);
backEnd->setThreadState(&thread);
@@ -250,7 +267,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
- thread.renameTable[i]->setCompleted();
+ thread.renameTable[i]->setResultReady();
}
frontEnd->renameTable.copyFrom(thread.renameTable);
@@ -312,11 +329,15 @@ OzoneCPU<Impl>::copyToXC()
*/
template <class Impl>
void
-OzoneCPU<Impl>::switchOut()
+OzoneCPU<Impl>::switchOut(Sampler *sampler)
{
+ // Front end needs state from back end, so switch out the back end first.
+ backEnd->switchOut();
+ frontEnd->switchOut();
_status = SwitchedOut;
if (tickEvent.scheduled())
tickEvent.squash();
+ sampler->signalSwitched();
}
template <class Impl>
@@ -325,8 +346,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
BaseCPU::takeOverFrom(oldCPU);
+ backEnd->takeOverFrom();
+ frontEnd->takeOverFrom();
assert(!tickEvent.scheduled());
+ // @todo: Fix hardcoded number
+ // Clear out any old information in time buffer.
+ for (int i = 0; i < 6; ++i) {
+ comm.advance();
+ }
+
// if any of this CPU's ExecContexts are active, mark the CPU as
// running and schedule its tick event.
for (int i = 0; i < execContexts.size(); ++i) {
@@ -470,7 +499,7 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
BaseCPU::serialize(os);
SERIALIZE_ENUM(_status);
nameOut(os, csprintf("%s.xc", name()));
- xcProxy.serialize(os);
+ ozoneXC.serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
}
@@ -481,7 +510,7 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
BaseCPU::unserialize(cp, section);
UNSERIALIZE_ENUM(_status);
- xcProxy.unserialize(cp, csprintf("%s.xc", section));
+ ozoneXC.unserialize(cp, csprintf("%s.xc", section));
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
}
@@ -579,7 +608,7 @@ template <class Impl>
Addr
OzoneCPU<Impl>::dbg_vtophys(Addr addr)
{
- return vtophys(&xcProxy, addr);
+ return vtophys(xcProxy, addr);
}
#endif // FULL_SYSTEM
/*
@@ -725,7 +754,7 @@ OzoneCPU<Impl>::tick()
comInstEventQueue[0]->serviceEvents(numInst);
if (!tickEvent.scheduled() && _status == Running)
- tickEvent.schedule(curTick + 1);
+ tickEvent.schedule(curTick + cycles(1));
}
template <class Impl>
@@ -750,7 +779,7 @@ OzoneCPU<Impl>::syscall()
DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
- thread.process->syscall(&xcProxy);
+ thread.process->syscall(xcProxy);
thread.funcExeInst--;
@@ -784,19 +813,17 @@ OzoneCPU<Impl>::hwrei()
{
// Need to move this to ISA code
// May also need to make this per thread
+/*
if (!inPalMode())
return new UnimplementedOpcodeFault;
thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
-
+*/
lockFlag = false;
+ lockAddrList.clear();
+ kernelStats->hwrei();
- // Not sure how to make a similar check in the Ozone model
-// if (!misspeculating()) {
- kernelStats->hwrei();
-
- checkInterrupts = true;
-// }
+ checkInterrupts = true;
// FIXME: XXX check for interrupts? XXX
return NoFault;
@@ -847,6 +874,11 @@ OzoneCPU<Impl>::processInterrupts()
if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
thread.setMiscReg(IPR_ISR, summary);
thread.setMiscReg(IPR_INTID, ipl);
+ // @todo: Make this more transparent
+ if (checker) {
+ checkerXC->setMiscReg(IPR_ISR, summary);
+ checkerXC->setMiscReg(IPR_INTID, ipl);
+ }
Fault fault = new InterruptFault;
fault->invoke(thread.getXCProxy());
DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
@@ -860,7 +892,7 @@ OzoneCPU<Impl>::simPalCheck(int palFunc)
{
// Need to move this to ISA code
// May also need to make this per thread
- this->kernelStats->callpal(palFunc, &xcProxy);
+ this->kernelStats->callpal(palFunc, xcProxy);
switch (palFunc) {
case PAL::halt:
@@ -944,7 +976,28 @@ OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
-{ }
+{
+ // some things should already be set up
+ assert(getMemPtr() == old_context->getMemPtr());
+#if FULL_SYSTEM
+ assert(getSystemPtr() == old_context->getSystemPtr());
+#else
+ assert(getProcessPtr() == old_context->getProcessPtr());
+#endif
+
+ // copy over functional state
+ setStatus(old_context->status());
+ copyArchRegs(old_context);
+ setCpuId(old_context->readCpuId());
+#if !FULL_SYSTEM
+ setFuncExeInst(old_context->readFuncExeInst());
+#endif
+
+// storeCondFailures = 0;
+ cpu->lockFlag = false;
+
+ old_context->setStatus(ExecContext::Unallocated);
+}
template <class Impl>
void
@@ -1062,21 +1115,24 @@ template <class Impl>
float
OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
{
- return thread->renameTable[reg_idx]->readFloatResult();
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ return thread->renameTable[idx]->readFloatResult();
}
template <class Impl>
double
OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
{
- return thread->renameTable[reg_idx]->readDoubleResult();
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ return thread->renameTable[idx]->readDoubleResult();
}
template <class Impl>
uint64_t
OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
{
- return thread->renameTable[reg_idx]->readIntResult();
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ return thread->renameTable[idx]->readIntResult();
}
template <class Impl>
@@ -1101,7 +1157,9 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
{
- thread->renameTable[reg_idx]->setDoubleResult(val);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+
+ thread->renameTable[idx]->setDoubleResult(val);
if (!thread->inSyscall) {
cpu->squashFromXC();
diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh
index 4382af0fd..f251c28ea 100644
--- a/cpu/ozone/dyn_inst.hh
+++ b/cpu/ozone/dyn_inst.hh
@@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst<Impl>
typedef TheISA::MiscReg MiscReg;
typedef typename std::list<DynInstPtr>::iterator ListIt;
- // Note that this is duplicated from the BaseDynInst class; I'm simply not
- // sure the enum would carry through so I could use it in array
- // declarations in this class.
+ // Note that this is duplicated from the BaseDynInst class; I'm
+ // simply not sure the enum would carry through so I could use it
+ // in array declarations in this class.
enum {
MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
MaxInstDestRegs = TheISA::MaxInstDestRegs
@@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst<Impl>
void addDependent(DynInstPtr &dependent_inst);
std::vector<DynInstPtr> &getDependents() { return dependents; }
+ std::vector<DynInstPtr> &getMemDeps() { return memDependents; }
+ std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; }
void wakeDependents();
+ void wakeMemDependents();
+
+ void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); }
+
+ void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); }
+
+ void markMemInstReady(OzoneDynInst<Impl> *inst);
+
+ // For now I will remove instructions from the list when they wake
+ // up. In the future, you only really need a counter.
+ bool memDepReady() { return srcMemInsts.empty(); }
+
// void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; }
// BPredInfo &getBPredInfo() { return bpInfo; }
@@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst<Impl>
std::vector<DynInstPtr> dependents;
- /** The instruction that produces the value of the source registers. These
- * may be NULL if the value has already been read from the source
- * instruction.
+ std::vector<DynInstPtr> memDependents;
+
+ std::list<DynInstPtr> srcMemInsts;
+
+ /** The instruction that produces the value of the source
+ * registers. These may be NULL if the value has already been
+ * read from the source instruction.
*/
DynInstPtr srcInsts[MaxInstSrcRegs];
@@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst<Impl>
*/
void setIntReg(const StaticInst *si, int idx, uint64_t val)
{
- this->instResult.integer = val;
+ BaseDynInst<Impl>::setIntReg(si, idx, val);
}
void setFloatRegSingle(const StaticInst *si, int idx, float val)
{
- this->instResult.fp = val;
+ BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
}
void setFloatRegDouble(const StaticInst *si, int idx, double val)
{
- this->instResult.dbl = val;
+ BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
}
void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
{
- this->instResult.integer = val;
+ BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
}
void setIntResult(uint64_t result) { this->instResult.integer = result; }
@@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst<Impl>
void clearDependents();
+ void clearMemDependents();
+
public:
// ISA stuff
MiscReg readMiscReg(int misc_reg);
diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh
index c83481c9a..a7e4460a1 100644
--- a/cpu/ozone/dyn_inst_impl.hh
+++ b/cpu/ozone/dyn_inst_impl.hh
@@ -38,7 +38,7 @@ template <class Impl>
OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
: BaseDynInst<Impl>(0, 0, 0, 0, cpu)
{
- this->setCompleted();
+ this->setResultReady();
initInstPtrs();
}
@@ -130,7 +130,7 @@ template <class Impl>
bool
OzoneDynInst<Impl>::srcInstReady(int regIdx)
{
- return srcInsts[regIdx]->isCompleted();
+ return srcInsts[regIdx]->isResultReady();
}
template <class Impl>
@@ -151,6 +151,28 @@ OzoneDynInst<Impl>::wakeDependents()
template <class Impl>
void
+OzoneDynInst<Impl>::wakeMemDependents()
+{
+ for (int i = 0; i < memDependents.size(); ++i) {
+ memDependents[i]->markMemInstReady(this);
+ }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst)
+{
+ ListIt mem_it = srcMemInsts.begin();
+ while ((*mem_it) != inst && mem_it != srcMemInsts.end()) {
+ mem_it++;
+ }
+ assert(mem_it != srcMemInsts.end());
+
+ srcMemInsts.erase(mem_it);
+}
+
+template <class Impl>
+void
OzoneDynInst<Impl>::initInstPtrs()
{
for (int i = 0; i < MaxInstSrcRegs; ++i) {
@@ -164,7 +186,7 @@ bool
OzoneDynInst<Impl>::srcsReady()
{
for (int i = 0; i < this->numSrcRegs(); ++i) {
- if (!srcInsts[i]->isCompleted())
+ if (!srcInsts[i]->isResultReady())
return false;
}
@@ -176,7 +198,7 @@ bool
OzoneDynInst<Impl>::eaSrcsReady()
{
for (int i = 1; i < this->numSrcRegs(); ++i) {
- if (!srcInsts[i]->isCompleted())
+ if (!srcInsts[i]->isResultReady())
return false;
}
@@ -195,6 +217,14 @@ OzoneDynInst<Impl>::clearDependents()
prevDestInst[i] = NULL;
}
}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::clearMemDependents()
+{
+ memDependents.clear();
+}
+
template <class Impl>
MiscReg
OzoneDynInst<Impl>::readMiscReg(int misc_reg)
@@ -213,6 +243,7 @@ template <class Impl>
Fault
OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
{
+ this->setIntResult(val);
return this->thread->setMiscReg(misc_reg, val);
}
@@ -234,11 +265,13 @@ OzoneDynInst<Impl>::hwrei()
this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
+ this->cpu->hwrei();
+/*
this->cpu->kernelStats->hwrei();
this->cpu->checkInterrupts = true;
this->cpu->lockFlag = false;
-
+*/
// FIXME: XXX check for interrupts? XXX
return NoFault;
}
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 2bff2544d..188925ae5 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -66,6 +66,14 @@ class FrontEnd
bool isEmpty() { return instBuffer.empty(); }
+ void switchOut();
+
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ bool switchedOut;
+
private:
bool updateStatus();
@@ -198,6 +206,9 @@ class FrontEnd
DynInstPtr barrierInst;
+ public:
+ bool interruptPending;
+ private:
// number of idle cycles
/*
Stats::Average<> notIdleFraction;
@@ -223,6 +234,8 @@ class FrontEnd
Stats::Scalar<> fetchBlockedCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
+
+ Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
// Stats::Vector<> qfull_iq_occupancy;
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index 7c18386cf..a3eb809d0 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -19,8 +19,11 @@ FrontEnd<Impl>::FrontEnd(Params *params)
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
- serializeNext(false)
+ serializeNext(false),
+ interruptPending(false)
{
+ switchedOut = false;
+
status = Idle;
// Setup branch predictor.
@@ -127,6 +130,11 @@ FrontEnd<Impl>::regStats()
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
+ fetchIcacheSquashes
+ .name(name() + ".fetchIcacheSquashes")
+ .desc("Number of outstanding Icache misses that were squashed")
+ .prereq(fetchIcacheSquashes);
+
fetchNisnDist
.init(/* base value */ 0,
/* last value */ width,
@@ -370,6 +378,10 @@ FrontEnd<Impl>::fetchCacheLine()
#endif // FULL_SYSTEM
Fault fault = NoFault;
+ if (interruptPending && flags == 0) {
+ return fault;
+ }
+
// Align the fetch PC so it's at the start of a cache block.
Addr fetch_PC = icacheBlockAlignPC(PC);
@@ -397,7 +409,8 @@ FrontEnd<Impl>::fetchCacheLine()
// exists within the cache.
if (icacheInterface && fault == NoFault) {
#if FULL_SYSTEM
- if (cpu->system->memctrl->badaddr(memReq->paddr)) {
+ if (cpu->system->memctrl->badaddr(memReq->paddr) ||
+ memReq->flags & UNCACHEABLE) {
DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path!",
memReq->paddr);
@@ -497,7 +510,7 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
dispatchedTempSerializing++;
}
- // Change status over to BarrierStall so that other stages know
+ // Change status over to SerializeBlocked so that other stages know
// what this is blocked on.
status = SerializeBlocked;
@@ -613,8 +626,10 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
// Do something here.
if (status != IcacheMissStall ||
- req != memReq) {
+ req != memReq ||
+ switchedOut) {
DPRINTF(FE, "Previous fetch was squashed.\n");
+ fetchIcacheSquashes++;
return;
}
@@ -702,6 +717,7 @@ FrontEnd<Impl>::getInstFromCacheline()
DynInstPtr inst = barrierInst;
status = Running;
barrierInst = NULL;
+ inst->clearSerializeBefore();
return inst;
}
@@ -773,7 +789,7 @@ FrontEnd<Impl>::renameInst(DynInstPtr &inst)
DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
- if (src_inst->isCompleted()) {
+ if (src_inst->isResultReady()) {
DPRINTF(FE, "Reg ready.\n");
inst->markSrcRegReady(i);
} else {
@@ -809,6 +825,38 @@ FrontEnd<Impl>::wakeFromQuiesce()
template <class Impl>
void
+FrontEnd<Impl>::switchOut()
+{
+ switchedOut = true;
+ memReq = NULL;
+ squash(0, 0);
+ instBuffer.clear();
+ instBufferSize = 0;
+ status = Idle;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ assert(freeRegs == numPhysRegs);
+ fetchCacheLineNextCycle = true;
+
+ cacheBlkValid = false;
+
+#if !FULL_SYSTEM
+// pTable = params->pTable;
+#endif
+ fetchFault = NoFault;
+ serializeNext = false;
+ barrierInst = NULL;
+ status = Running;
+ switchedOut = false;
+ interruptPending = false;
+}
+
+template <class Impl>
+void
FrontEnd<Impl>::dumpInsts()
{
cprintf("instBuffer size: %i\n", instBuffer.size());
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index f17c93ff4..028fdaf8c 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -17,6 +17,8 @@
#include "mem/mem_req.hh"
#include "sim/eventq.hh"
+template <class>
+class Checker;
class ExecContext;
template <class Impl>
@@ -126,6 +128,8 @@ class LWBackEnd
Addr commitPC;
+ Tick lastCommitCycle;
+
bool robEmpty() { return instList.empty(); }
bool isFull() { return numInsts >= numROBEntries; }
@@ -133,7 +137,7 @@ class LWBackEnd
void fetchFault(Fault &fault);
- int wakeDependents(DynInstPtr &inst);
+ int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
@@ -182,6 +186,12 @@ class LWBackEnd
void instToCommit(DynInstPtr &inst);
+ void switchOut();
+
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
private:
void generateTrapEvent(Tick latency = 0);
void handleFault(Fault &fault, Tick latency = 0);
@@ -303,6 +313,10 @@ class LWBackEnd
Fault faultFromFetch;
bool fetchHasFault;
+ bool switchedOut;
+
+ DynInstPtr memBarrier;
+
private:
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@@ -327,7 +341,7 @@ class LWBackEnd
bool exactFullStall;
- bool fetchRedirect[Impl::MaxThreads];
+// bool fetchRedirect[Impl::MaxThreads];
// number of cycles stalled for D-cache misses
/* Stats::Scalar<> dcacheStallCycles;
@@ -414,6 +428,8 @@ class LWBackEnd
Stats::VectorDistribution<> ROB_occ_dist;
public:
void dumpInsts();
+
+ Checker<DynInstPtr> *checker;
};
template <class Impl>
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index d1290239c..d4829629d 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -1,5 +1,6 @@
#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/checker/cpu.hh"
#include "cpu/ozone/lw_back_end.hh"
template <class Impl>
@@ -10,28 +11,36 @@ LWBackEnd<Impl>::generateTrapEvent(Tick latency)
TrapEvent *trap = new TrapEvent(this);
- trap->schedule(curTick + latency);
+ trap->schedule(curTick + cpu->cycles(latency));
thread->trapPending = true;
}
template <class Impl>
int
-LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst)
+LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
{
assert(!inst->isSquashed());
- std::vector<DynInstPtr> &dependents = inst->getDependents();
+ std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
+ inst->getDependents();
int num_outputs = dependents.size();
DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
for (int i = 0; i < num_outputs; i++) {
DynInstPtr dep_inst = dependents[i];
- dep_inst->markSrcRegReady();
+ if (!memory_deps) {
+ dep_inst->markSrcRegReady();
+ } else {
+ if (!dep_inst->isSquashed())
+ dep_inst->markMemInstReady(inst.get());
+ }
+
DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
- !dep_inst->isNonSpeculative()) {
+ !dep_inst->isNonSpeculative() &&
+ dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) {
DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
dep_inst->seqNum);
exeList.push(dep_inst);
@@ -114,6 +123,9 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
// iewStage->wakeCPU();
+ if (be->isSwitchedOut())
+ return;
+
if (dcacheMiss) {
be->removeDcacheMiss(inst);
}
@@ -169,16 +181,18 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
- xcSquash(false), cacheCompletionEvent(this),
+ trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
numDispatchEntries = 32;
- maxOutstandingMemOps = 4;
+ maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
+ switchedOut = false;
+
// IQ.setBE(this);
LSQ.setBE(this);
@@ -533,6 +547,7 @@ LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
{
cpu = cpu_ptr;
LSQ.setCPU(cpu_ptr);
+ checker = cpu->checker;
}
template <class Impl>
@@ -554,30 +569,35 @@ LWBackEnd<Impl>::checkInterrupts()
!cpu->inPalMode(thread->readPC()) &&
!trapSquash &&
!xcSquash) {
- // Will need to squash all instructions currently in flight and have
- // the interrupt handler restart at the last non-committed inst.
- // Most of that can be handled through the trap() function. The
- // processInterrupts() function really just checks for interrupts
- // and then calls trap() if there is an interrupt present.
+ frontEnd->interruptPending = true;
+ if (robEmpty() && !LSQ.hasStoresToWB()) {
+ // Will need to squash all instructions currently in flight and have
+ // the interrupt handler restart at the last non-committed inst.
+ // Most of that can be handled through the trap() function. The
+ // processInterrupts() function really just checks for interrupts
+ // and then calls trap() if there is an interrupt present.
- // Not sure which thread should be the one to interrupt. For now
- // always do thread 0.
- assert(!thread->inSyscall);
- thread->inSyscall = true;
+ // Not sure which thread should be the one to interrupt. For now
+ // always do thread 0.
+ assert(!thread->inSyscall);
+ thread->inSyscall = true;
- // CPU will handle implementation of the interrupt.
- cpu->processInterrupts();
+ // CPU will handle implementation of the interrupt.
+ cpu->processInterrupts();
- // Now squash or record that I need to squash this cycle.
- commitStatus = TrapPending;
+ // Now squash or record that I need to squash this cycle.
+ commitStatus = TrapPending;
- // Exit state update mode to avoid accidental updating.
- thread->inSyscall = false;
+ // Exit state update mode to avoid accidental updating.
+ thread->inSyscall = false;
- // Generate trap squash event.
- generateTrapEvent();
+ // Generate trap squash event.
+ generateTrapEvent();
- DPRINTF(BE, "Interrupt detected.\n");
+ DPRINTF(BE, "Interrupt detected.\n");
+ } else {
+ DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
+ }
}
}
@@ -585,7 +605,7 @@ template <class Impl>
void
LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
{
- DPRINTF(BE, "Handling fault!");
+ DPRINTF(BE, "Handling fault!\n");
assert(!thread->inSyscall);
@@ -615,6 +635,9 @@ LWBackEnd<Impl>::tick()
wbCycle = 0;
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
#if FULL_SYSTEM
checkInterrupts();
@@ -623,7 +646,7 @@ LWBackEnd<Impl>::tick()
squashFromTrap();
} else if (xcSquash) {
squashFromXC();
- } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) {
+ } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
Fault fetch_fault = frontEnd->getFault();
if (fetch_fault == NoFault) {
@@ -636,9 +659,6 @@ LWBackEnd<Impl>::tick()
}
#endif
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
-
if (dispatchStatus != Blocked) {
dispatchInsts();
} else {
@@ -719,12 +739,41 @@ LWBackEnd<Impl>::dispatchInsts()
for (int i = 0; i < inst->numDestRegs(); ++i)
renameTable[inst->destRegIdx(i)] = inst;
- if (inst->readyToIssue() && !inst->isNonSpeculative()) {
- DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
- inst->seqNum);
- exeList.push(inst);
+ if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ if (memBarrier) {
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+ }
+ memBarrier = inst;
+ inst->setCanCommit();
+ } else if (inst->readyToIssue() && !inst->isNonSpeculative()) {
if (inst->isMemRef()) {
+
LSQ.insert(inst);
+ if (memBarrier) {
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+ addWaitingMemOp(inst);
+
+ waitingList.push_front(inst);
+ inst->iqIt = waitingList.begin();
+ inst->iqItValid = true;
+ waitingInsts++;
+ } else {
+ DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+ inst->seqNum);
+ exeList.push(inst);
+ }
+ } else {
+ DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+ inst->seqNum);
+ exeList.push(inst);
}
} else {
if (inst->isNonSpeculative()) {
@@ -735,6 +784,14 @@ LWBackEnd<Impl>::dispatchInsts()
if (inst->isMemRef()) {
addWaitingMemOp(inst);
LSQ.insert(inst);
+ if (memBarrier) {
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ }
}
DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
@@ -872,9 +929,6 @@ LWBackEnd<Impl>::executeInsts()
++funcExeInst;
++num_executed;
- // keep an instruction count
- thread->numInst++;
- thread->numInsts++;
exeList.pop();
@@ -915,7 +969,7 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
inst->setCanCommit();
if (inst->isExecuted()) {
- inst->setCompleted();
+ inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
producer_inst[0]++;
@@ -956,7 +1010,7 @@ LWBackEnd<Impl>::writebackInsts()
inst->seqNum, inst->readPC());
inst->setCanCommit();
- inst->setCompleted();
+ inst->setResultReady();
if (inst->isExecuted()) {
int dependents = wakeDependents(inst);
@@ -997,7 +1051,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// If the instruction is not executed yet, then it is a non-speculative
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
- if (inst->isNonSpeculative()) {
+ if (inst->isNonSpeculative() ||
+ inst->isMemBarrier() ||
+ inst->isWriteBarrier()) {
#if !FULL_SYSTEM
// Hack to make sure syscalls aren't executed until all stores
// write back their data. This direct communication shouldn't
@@ -1017,6 +1073,16 @@ LWBackEnd<Impl>::commitInst(int inst_num)
"instruction at the head of the ROB, PC %#x.\n",
inst->readPC());
+ if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
+ inst->seqNum);
+ assert(memBarrier);
+ wakeDependents(inst, true);
+ if (memBarrier == inst)
+ memBarrier = NULL;
+ inst->clearMemDependents();
+ }
+
// Send back the non-speculative instruction's sequence number.
if (inst->iqItValid) {
DPRINTF(BE, "Removing instruction from waiting list\n");
@@ -1066,13 +1132,45 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// Not handled for now.
assert(!inst->isThreadSync());
-
+ assert(inst->memDepReady());
+ // Stores will mark themselves as totally completed as they need
+ // to wait to writeback to memory. @todo: Hack...attempt to fix
+ // having the checker be forced to wait until a store completes in
+ // order to check all of the instructions. If the store at the
+ // head of the check list misses, but a later store hits, then
+ // loads in the checker may see the younger store values instead
+ // of the store they should see. Either the checker needs its own
+ // memory (annoying to update), its own store buffer (how to tell
+ // which value is correct?), or something else...
+ if (!inst->isStore()) {
+ inst->setCompleted();
+ }
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = inst->getFault();
+ // Use checker prior to updating anything due to traps or PC
+ // based events.
+ if (checker) {
+ checker->tick(inst);
+ }
+
if (inst_fault != NoFault) {
DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
inst->seqNum, inst->readPC());
+
+ // Instruction is completed as it has a fault.
+ inst->setCompleted();
+
+ if (LSQ.hasStoresToWB()) {
+ DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
+ return false;
+ } else if (inst_num != 0) {
+ DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
+ return false;
+ } else if (checker && inst->isStore()) {
+ checker->tick(inst);
+ }
+
thread->setInst(
static_cast<TheISA::MachInst>(inst->staticInst->machInst));
#if FULL_SYSTEM
@@ -1094,6 +1192,8 @@ LWBackEnd<Impl>::commitInst(int inst_num)
}
if (inst->traceData) {
+ inst->traceData->setFetchSeq(inst->seqNum);
+ inst->traceData->setCPSeq(thread->numInst);
inst->traceData->finalize();
inst->traceData = NULL;
}
@@ -1105,18 +1205,18 @@ LWBackEnd<Impl>::commitInst(int inst_num)
instList.pop_back();
--numInsts;
- thread->numInsts++;
++thread->funcExeInst;
- // Maybe move this to where teh fault is handled; if the fault is handled,
+ // Maybe move this to where the fault is handled; if the fault is handled,
// don't try to set this myself as the fault will set it. If not, then
// I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4.
thread->setPC(thread->readNextPC());
+ thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
updateComInstStats(inst);
// Write the done sequence number here.
// LSQ.commitLoads(inst->seqNum);
-// LSQ.commitStores(inst->seqNum);
toIEW->doneSeqNum = inst->seqNum;
+ lastCommitCycle = curTick;
#if FULL_SYSTEM
int count = 0;
@@ -1243,6 +1343,22 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
waitingInsts--;
}
+ while (memBarrier && memBarrier->seqNum > sn) {
+ DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum);
+ memBarrier->clearMemDependents();
+ if (memBarrier->memDepReady()) {
+ DPRINTF(BE, "No previous barrier\n");
+ memBarrier = NULL;
+ } else {
+ std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
+ memBarrier = srcs.front();
+ srcs.pop_front();
+ assert(srcs.empty());
+ DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
+ memBarrier->seqNum);
+ }
+ }
+
frontEnd->addFreeRegs(freed_regs);
}
@@ -1254,6 +1370,7 @@ LWBackEnd<Impl>::squashFromXC()
squash(squashed_inst);
frontEnd->squash(squashed_inst, thread->readPC(),
false, false);
+ frontEnd->interruptPending = false;
thread->trapPending = false;
thread->inSyscall = false;
@@ -1269,6 +1386,7 @@ LWBackEnd<Impl>::squashFromTrap()
squash(squashed_inst);
frontEnd->squash(squashed_inst, thread->readPC(),
false, false);
+ frontEnd->interruptPending = false;
thread->trapPending = false;
thread->inSyscall = false;
@@ -1321,6 +1439,36 @@ LWBackEnd<Impl>::fetchFault(Fault &fault)
template <class Impl>
void
+LWBackEnd<Impl>::switchOut()
+{
+ switchedOut = true;
+ // Need to get rid of all committed, non-speculative state and write it
+ // to memory/XC. In this case this is stores that have committed and not
+ // yet written back.
+ LSQ.switchOut();
+ squash(0);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ switchedOut = false;
+ xcSquash = false;
+ trapSquash = false;
+
+ numInsts = 0;
+ numWaitingMemOps = 0;
+ waitingMemOps.clear();
+ waitingInsts = 0;
+ switchedOut = false;
+ dispatchStatus = Running;
+ commitStatus = Running;
+ LSQ.takeOverFrom(old_xc);
+}
+
+template <class Impl>
+void
LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
{
int thread_number = inst->threadNumber;
@@ -1358,7 +1506,11 @@ template <class Impl>
void
LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
{
- unsigned thread = inst->threadNumber;
+ unsigned tid = inst->threadNumber;
+
+ // keep an instruction count
+ thread->numInst++;
+ thread->numInsts++;
cpu->numInst++;
//
@@ -1366,33 +1518,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
- stat_com_swp[thread]++;
+ stat_com_swp[tid]++;
} else {
- stat_com_inst[thread]++;
+ stat_com_inst[tid]++;
}
#else
- stat_com_inst[thread]++;
+ stat_com_inst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
- stat_com_branches[thread]++;
+ stat_com_branches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
- stat_com_refs[thread]++;
+ stat_com_refs[tid]++;
if (inst->isLoad()) {
- stat_com_loads[thread]++;
+ stat_com_loads[tid]++;
}
}
if (inst->isMemBarrier()) {
- stat_com_membars[thread]++;
+ stat_com_membars[tid]++;
}
}
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index eb9886244..042610324 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -41,6 +41,7 @@
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
+#include "sim/debug.hh"
#include "sim/sim_object.hh"
//class PageTable;
@@ -90,7 +91,10 @@ class OzoneLWLSQ {
/** The writeback event for the store. Needed for store
* conditionals.
*/
+ public:
Event *wbEvent;
+ bool miss;
+ private:
/** The pointer to the LSQ unit that issued the store. */
OzoneLWLSQ<Impl> *lsqPtr;
};
@@ -228,6 +232,14 @@ class OzoneLWLSQ {
!storeQueue.back().completed &&
!dcacheInterface->isBlocked(); }
+ void switchOut();
+
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ bool switchedOut;
+
private:
/** Completes the store at the specified index. */
void completeStore(int store_idx);
@@ -560,12 +572,10 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
sq_it++;
}
-
// If there's no forwarding case, then go access memory
DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
inst->readPC());
-
// Setup MemReq pointer
req->cmd = Read;
req->completionEvent = NULL;
@@ -594,8 +604,12 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
"vaddr:%#x flags:%i\n",
inst->readPC(), req->paddr, req->vaddr, req->flags);
-
-
+/*
+ Addr debug_addr = ULL(0xfffffc0000be81a8);
+ if (req->vaddr == debug_addr) {
+ debug_break();
+ }
+*/
assert(!req->completionEvent);
req->completionEvent =
new typename BackEnd::LdWritebackEvent(inst, be);
@@ -647,7 +661,15 @@ OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
(*sq_it).req = req;
(*sq_it).size = sizeof(T);
(*sq_it).data = data;
-
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+/*
+ Addr debug_addr = ULL(0xfffffc0000be81a8);
+ if (req->vaddr == debug_addr) {
+ debug_break();
+ }
+*/
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index 7b22d2564..9b7e48f96 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -29,6 +29,7 @@
#include "arch/isa_traits.hh"
#include "base/str.hh"
#include "cpu/ozone/lw_lsq.hh"
+#include "cpu/checker/cpu.hh"
template <class Impl>
OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
@@ -39,6 +40,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
inst(_inst),
be(_be),
wbEvent(wb_event),
+ miss(false),
lsqPtr(lsq_ptr)
{
this->setFlags(Event::AutoDelete);
@@ -54,13 +56,21 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
// lsqPtr->cpu->wakeCPU();
+ if (lsqPtr->isSwitchedOut()) {
+ if (wbEvent)
+ delete wbEvent;
+
+ return;
+ }
+
if (wbEvent) {
wbEvent->process();
delete wbEvent;
}
lsqPtr->completeStore(inst->sqIdx);
- be->removeDcacheMiss(inst);
+ if (miss)
+ be->removeDcacheMiss(inst);
}
template <class Impl>
@@ -80,8 +90,7 @@ OzoneLWLSQ<Impl>::OzoneLWLSQ()
template<class Impl>
void
OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id)
-
+ unsigned maxSQEntries, unsigned id)
{
DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
@@ -90,7 +99,7 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
LQEntries = maxLQEntries;
SQEntries = maxSQEntries;
- for (int i = 0; i < LQEntries * 10; i++) {
+ for (int i = 0; i < LQEntries * 2; i++) {
LQIndices.push(i);
SQIndices.push(i);
}
@@ -196,6 +205,7 @@ template <class Impl>
void
OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
{
+ assert(loads < LQEntries * 2);
assert(!LQIndices.empty());
int load_index = LQIndices.front();
LQIndices.pop();
@@ -503,21 +513,13 @@ OzoneLWLSQ<Impl>::writebackStores()
assert((*sq_it).req);
assert(!(*sq_it).committed);
- MemReqPtr req = (*sq_it).req;
(*sq_it).committed = true;
+ MemReqPtr req = (*sq_it).req;
+
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-
- DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- inst->sqIdx,inst->readPC(),
- req->paddr, *(req->data),
- inst->seqNum);
switch((*sq_it).size) {
case 1:
@@ -535,8 +537,25 @@ OzoneLWLSQ<Impl>::writebackStores()
default:
panic("Unexpected store size!\n");
}
+ if (!(req->flags & LOCKED)) {
+ (*sq_it).inst->setCompleted();
+ if (cpu->checker) {
+ cpu->checker->tick((*sq_it).inst);
+ }
+ }
+
+ DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ inst->sqIdx,inst->readPC(),
+ req->paddr, *(req->data),
+ inst->seqNum);
if (dcacheInterface) {
+ assert(!req->completionEvent);
+ StoreCompletionEvent *store_event = new
+ StoreCompletionEvent(inst, be, NULL, this);
+ req->completionEvent = store_event;
+
MemAccessResult result = dcacheInterface->access(req);
if (isStalled() &&
@@ -551,13 +570,14 @@ OzoneLWLSQ<Impl>::writebackStores()
if (result != MA_HIT && dcacheInterface->doEvents()) {
// Event *wb = NULL;
-
+ store_event->miss = true;
typename BackEnd::LdWritebackEvent *wb = NULL;
if (req->flags & LOCKED) {
// Stx_C does not generate a system port transaction.
// req->result=1;
wb = new typename BackEnd::LdWritebackEvent(inst,
be);
+ store_event->wbEvent = wb;
}
DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
@@ -567,9 +587,6 @@ OzoneLWLSQ<Impl>::writebackStores()
// Will stores need their own kind of writeback events?
// Do stores even need writeback events?
- assert(!req->completionEvent);
- req->completionEvent = new
- StoreCompletionEvent(inst, be, wb, this);
be->addDcacheMiss(inst);
lastDcacheStall = curTick;
@@ -597,10 +614,10 @@ OzoneLWLSQ<Impl>::writebackStores()
typename BackEnd::LdWritebackEvent *wb =
new typename BackEnd::LdWritebackEvent(inst,
be);
- wb->schedule(curTick);
+ store_event->wbEvent = wb;
}
sq_it--;
- completeStore(inst->sqIdx);
+// completeStore(inst->sqIdx);
}
} else {
panic("Must HAVE DCACHE!!!!!\n");
@@ -758,31 +775,121 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
inst->sqIdx, inst->seqNum, storesToWB);
- // A bit conservative because a store completion may not free up entries,
- // but hopefully avoids two store completions in one cycle from making
- // the CPU tick twice.
-// cpu->activityThisCycle();
assert(!storeQueue.empty());
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
--stores;
-/*
- SQIt oldest_store_it = --(storeQueue.end());
- if (sq_it == oldest_store_it) {
- do {
- inst = (*oldest_store_it).inst;
- sq_hash_it = SQItHash.find(inst->sqIdx);
- assert(sq_hash_it != SQItHash.end());
- SQItHash.erase(sq_hash_it);
- SQIndices.push(inst->sqIdx);
- storeQueue.erase(oldest_store_it--);
-
- --stores;
- } while ((*oldest_store_it).completed &&
- oldest_store_it != storeQueue.end());
-
-// be->updateLSQNextCycle = true;
+// assert(!inst->isCompleted());
+ inst->setCompleted();
+ if (cpu->checker) {
+ cpu->checker->tick(inst);
}
-*/
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::switchOut()
+{
+ switchedOut = true;
+ SQIt sq_it = --(storeQueue.end());
+ while (storesToWB > 0 &&
+ sq_it != storeQueue.end() &&
+ (*sq_it).inst &&
+ (*sq_it).canWB) {
+
+ DynInstPtr inst = (*sq_it).inst;
+
+ if ((*sq_it).size == 0 && !(*sq_it).completed) {
+ sq_it--;
+// completeStore(inst->sqIdx);
+
+ continue;
+ }
+
+ // Store conditionals don't complete until *after* they have written
+ // back. If it's here and not yet sent to memory, then don't bother
+ // as it's not part of committed state.
+ if (inst->isDataPrefetch() || (*sq_it).committed ||
+ (*sq_it).req->flags & LOCKED) {
+ sq_it--;
+ continue;
+ }
+
+ assert((*sq_it).req);
+ assert(!(*sq_it).committed);
+
+ MemReqPtr req = (*sq_it).req;
+ (*sq_it).committed = true;
+
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+
+ DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
+ inst->sqIdx,inst->readPC(),
+ req->paddr, *(req->data),
+ inst->seqNum);
+
+ switch((*sq_it).size) {
+ case 1:
+ cpu->write(req, (uint8_t &)(*sq_it).data);
+ break;
+ case 2:
+ cpu->write(req, (uint16_t &)(*sq_it).data);
+ break;
+ case 4:
+ cpu->write(req, (uint32_t &)(*sq_it).data);
+ break;
+ case 8:
+ cpu->write(req, (uint64_t &)(*sq_it).data);
+ break;
+ default:
+ panic("Unexpected store size!\n");
+ }
+ }
+
+ // Clear the queue to free up resources
+ storeQueue.clear();
+ loadQueue.clear();
+ loads = stores = storesToWB = 0;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ // Clear out any old state. May be redundant if this is the first time
+ // the CPU is being used.
+ stalled = false;
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+ switchedOut = false;
+
+ // Could do simple checks here to see if indices are on twice
+ while (!LQIndices.empty())
+ LQIndices.pop();
+ while (!SQIndices.empty())
+ SQIndices.pop();
+
+ for (int i = 0; i < LQEntries * 2; i++) {
+ LQIndices.push(i);
+ SQIndices.push(i);
+ }
+
+ // May want to initialize these entries to NULL
+
+// loadHead = loadTail = 0;
+
+// storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+
+ loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
}
diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh
index e503654aa..647da1781 100644
--- a/cpu/ozone/simple_params.hh
+++ b/cpu/ozone/simple_params.hh
@@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params
unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
+ unsigned maxOutstandingMemOps;
//
// Fetch
//
diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py
index 8186a44bb..3fca61e28 100644
--- a/python/m5/objects/OzoneCPU.py
+++ b/python/m5/objects/OzoneCPU.py
@@ -9,12 +9,15 @@ class DerivOzoneCPU(BaseCPU):
if not build_env['FULL_SYSTEM']:
mem = Param.FunctionalMemory(NULL, "memory")
+ checker = Param.BaseCPU("Checker CPU")
+
width = Param.Unsigned("Width")
frontEndWidth = Param.Unsigned("Front end width")
backEndWidth = Param.Unsigned("Back end width")
backEndSquashLatency = Param.Unsigned("Back end squash latency")
backEndLatency = Param.Unsigned("Back end latency")
maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
+ maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations")
decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "