summaryrefslogtreecommitdiff
path: root/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'cpu')
-rw-r--r--cpu/cpu_exec_context.cc5
-rw-r--r--cpu/o3/cpu.cc40
-rw-r--r--cpu/o3/fetch_impl.hh3
-rw-r--r--cpu/o3/inst_queue.hh4
-rw-r--r--cpu/o3/inst_queue_impl.hh7
-rw-r--r--cpu/o3/lsq_unit.hh3
-rw-r--r--cpu/o3/lsq_unit_impl.hh5
-rw-r--r--cpu/o3/rename.hh2
-rw-r--r--cpu/o3/rename_impl.hh8
-rw-r--r--cpu/ozone/cpu_impl.hh59
-rw-r--r--cpu/simple/cpu.cc31
11 files changed, 138 insertions, 29 deletions
diff --git a/cpu/cpu_exec_context.cc b/cpu/cpu_exec_context.cc
index 0dcf149fd..9f151dd6a 100644
--- a/cpu/cpu_exec_context.cc
+++ b/cpu/cpu_exec_context.cc
@@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext)
if (quiesceEvent) {
quiesceEvent->xc = proxy;
}
+
+ Kernel::Statistics *stats = oldContext->getKernelStats();
+ if (stats) {
+ kernelStats = stats;
+ }
#endif
storeCondFailures = 0;
diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc
index 0025d4144..88de6c746 100644
--- a/cpu/o3/cpu.cc
+++ b/cpu/o3/cpu.cc
@@ -599,8 +599,11 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
// Be sure to signal that there's some activity so the CPU doesn't
// deschedule itself.
activityRec.activity();
+
+#if FULL_SYSTEM
if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled())
thread[tid]->quiesceEvent->deschedule();
+#endif
fetch.wakeFromQuiesce();
@@ -671,6 +674,8 @@ template <class Impl>
void
FullO3CPU<Impl>::switchOut(Sampler *_sampler)
{
+ DPRINTF(FullCPU, "Switching out\n");
+ BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
fetch.switchOut();
@@ -694,6 +699,41 @@ FullO3CPU<Impl>::signalSwitched()
rename.doSwitchOut();
commit.doSwitchOut();
instList.clear();
+
+#ifndef NDEBUG
+ PhysRegIndex renamed_reg;
+ // First loop through the integer registers.
+ for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
+ renamed_reg = renameMap[0].lookup(i);
+ assert(renamed_reg == commitRenameMap[0].lookup(i));
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+ renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag);
+ assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag));
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+
+ for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) {
+ renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs;
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+#endif
+
while (!removeList.empty()) {
removeList.pop();
}
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index cc09c4a41..7a3292dbe 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -391,6 +391,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
+ interruptPending = false;
branchPred.takeOverFrom();
}
@@ -469,7 +470,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (interruptPending && flags == 0) {
+ if (isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 80cd71f0d..e96fbc667 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -474,11 +474,11 @@ class InstructionQueue
Stats::Scalar<> iqSquashedNonSpecRemoved;
/** Distribution of number of instructions in the queue. */
- Stats::VectorDistribution<> queueResDist;
+// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction. */
- Stats::VectorDistribution<> issueDelayDist;
+// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index 72cb0d708..b6b06ca77 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
-
+/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
+*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
-
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
-
+*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index fe174a97d..1db6dc02d 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -382,6 +382,9 @@ class LSQUnit {
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index 5cc3078f8..7086c381e 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -144,6 +144,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -495,6 +499,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
+ ++lsqMemOrderViolation;
return genMachineCheckFault();
}
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh
index 4912431ad..5769dbd37 100644
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -411,6 +411,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
+ PhysRegIndex maxPhysicalRegs;
+
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index 93f5b3504..49627e3d4 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -40,7 +40,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -283,6 +284,11 @@ DefaultRename<Impl>::doSwitchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 050bdb9a3..1a0de29f5 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -184,7 +184,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
globalSeqNum = 1;
+#if FULL_SYSTEM
checkInterrupts = false;
+#endif
lockFlag = 0;
@@ -213,6 +215,7 @@ template <class Impl>
void
OzoneCPU<Impl>::switchOut(Sampler *_sampler)
{
+ BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
// Front end needs state from back end, so switch out the back end first.
@@ -234,6 +237,16 @@ OzoneCPU<Impl>::signalSwitched()
checker->switchOut(sampler);
_status = SwitchedOut;
+#ifndef NDEBUG
+ // Loop through all registers
+ for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i] == frontEnd->renameTable[i]);
+
+ assert(thread.renameTable[i] == backEnd->renameTable[i]);
+
+ DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
+ }
+#endif
if (tickEvent.scheduled())
tickEvent.squash();
@@ -256,9 +269,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
frontEnd->takeOverFrom();
assert(!tickEvent.scheduled());
+#ifndef NDEBUG
+ // Check rename table.
+ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i]->isResultReady());
+ }
+#endif
+
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < 15; ++i) {
comm.advance();
}
@@ -291,8 +311,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
scheduleTickEvent(delay);
_status = Running;
thread._status = ExecContext::Active;
+#if FULL_SYSTEM
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
thread.quiesceEvent->deschedule();
+#endif
frontEnd->wakeFromQuiesce();
}
@@ -369,7 +391,7 @@ template <class Impl>
void
OzoneCPU<Impl>::resetStats()
{
- startNumInst = numInst;
+// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -777,7 +799,9 @@ OzoneCPU<Impl>::OzoneXC::halt()
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
-{ }
+{
+ thread->dumpFuncProfile();
+}
#endif
template <class Impl>
@@ -797,6 +821,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
+ thread->inst = old_context->getInst();
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#else
@@ -869,16 +894,14 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileClear()
{
- if (thread->profile)
- thread->profile->clear();
+ thread->profileClear();
}
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileSample()
{
- if (thread->profile)
- thread->profile->sample(thread->profileNode, thread->profilePC);
+ thread->profileSample();
}
#endif
@@ -906,14 +929,20 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
cpu->frontEnd->setPC(thread->PC);
cpu->frontEnd->setNextPC(thread->nextPC);
- for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
- if (i < TheISA::FP_Base_DepTag) {
- thread->renameTable[i]->setIntResult(xc->readIntReg(i));
- } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
- int fp_idx = i - TheISA::FP_Base_DepTag;
- thread->renameTable[i]->setDoubleResult(
- xc->readFloatRegDouble(fp_idx));
- }
+ // First loop through the integer registers.
+ for (int i = 0; i < TheISA::NumIntRegs; ++i) {
+/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
+ "now has data %lli.\n",
+ i, thread->renameTable[i]->readIntResult(),
+ xc->readIntReg(i));
+*/
+ thread->renameTable[i]->setIntResult(xc->readIntReg(i));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
+ int fp_idx = i + TheISA::FP_Base_DepTag;
+ thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i));
}
#if !FULL_SYSTEM
diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc
index 0a4b3c3e4..eb19115b2 100644
--- a/cpu/simple/cpu.cc
+++ b/cpu/simple/cpu.cc
@@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s)
_status = SwitchedOut;
if (tickEvent.scheduled())
- tickEvent.squash();
+ tickEvent.deschedule();
+
+ assert(!tickEvent.scheduled());
sampler->signalSwitched();
}
@@ -294,7 +296,7 @@ SimpleCPU::regStats()
void
SimpleCPU::resetStats()
{
- startNumInst = numInst;
+// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src)
Fault fault = cpuXC->translateDataReadReq(memReq);
if (fault == NoFault) {
+ panic("We can't copy!");
cpuXC->copySrcAddr = src;
cpuXC->copySrcPhysAddr = memReq->paddr + offset;
} else {
@@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr)
void
SimpleCPU::processCacheCompletion()
{
+ Fault fault;
+
switch (status()) {
case IcacheMissStall:
icacheStallCycles += curTick - lastIcacheStall;
@@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion()
break;
case DcacheMissSwitch:
if (memReq->cmd.isRead()) {
- curStaticInst->execute(this,traceData);
+ fault = curStaticInst->execute(this,traceData);
if (traceData)
traceData->finalize();
+ } else {
+ fault = NoFault;
}
+ assert(fault == NoFault);
+ assert(!tickEvent.scheduled());
_status = SwitchedOut;
sampler->signalSwitched();
+ return;
case SwitchedOut:
// If this CPU has been switched out due to sampling/warm-up,
// ignore any further status changes (e.g., due to cache
@@ -787,9 +797,10 @@ SimpleCPU::tick()
}
if (cpuXC->profile) {
- bool usermode =
- (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
- cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+// bool usermode =
+// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+ cpuXC->profilePC = cpuXC->readPC();
ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
if (node)
cpuXC->profileNode = node;
@@ -849,8 +860,10 @@ SimpleCPU::tick()
status() == Idle ||
status() == DcacheMissStall);
- if (status() == Running && !tickEvent.scheduled())
+ if (status() == Running && !tickEvent.scheduled()) {
+ assert(_status != SwitchedOut);
tickEvent.schedule(curTick + cycles(1));
+ }
}
////////////////////////////////////////////////////////////////////////
@@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+ Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
@@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
+ INIT_PARAM(stats_reset_inst,
+ "instruction to reset stats on"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0),
#if FULL_SYSTEM
@@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
+ params->stats_reset_inst = stats_reset_inst;
params->deferRegistration = defer_registration;
params->clock = clock;
params->functionTrace = function_trace;