summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-08-24 17:29:34 -0400
committerKevin Lim <ktlim@umich.edu>2006-08-24 17:29:34 -0400
commit74e8abd37ecd637a607f90e36aed1a3a16eea7da (patch)
tree34f7eae768f3d5bb0fd3a54db8f709f80fea8f22
parent5da3f70560cddce24969840dd97f82b03428ba67 (diff)
downloadgem5-74e8abd37ecd637a607f90e36aed1a3a16eea7da.tar.xz
Switch out fixups for the CPUs.
cpu/cpu_exec_context.cc: Be sure to switch over the kernel stats so things don't get messed up. This may lead to weird stats files for sampling runs (detailed stats should be correct, regardless of which kernel stats this is defined on). cpu/o3/cpu.cc: Updates for switching out. Also include a bunch of debug info if needed. cpu/o3/fetch_impl.hh: Switch out properly. cpu/o3/inst_queue.hh: cpu/o3/inst_queue_impl.hh: Comment out unused stats (they made the stats file huge). cpu/o3/lsq_unit.hh: cpu/o3/lsq_unit_impl.hh: Add in new stat. cpu/o3/rename.hh: Fix up for switching out. cpu/o3/rename_impl.hh: Fix up for switching out. Be sure to mark any Misc regs as ready if their renamed inst got squashed from being switched out. cpu/ozone/cpu_impl.hh: cpu/simple/cpu.cc: Switch out fixup. sim/eventq.hh: Make CPU switching more immediate. Also comment out the assertion, as it doesn't apply if we're putting it on an inst-based queue. --HG-- extra : convert_revision : f40ed40604738993f061e0c628810ff37a920562
-rw-r--r--cpu/cpu_exec_context.cc5
-rw-r--r--cpu/o3/cpu.cc40
-rw-r--r--cpu/o3/fetch_impl.hh3
-rw-r--r--cpu/o3/inst_queue.hh4
-rw-r--r--cpu/o3/inst_queue_impl.hh7
-rw-r--r--cpu/o3/lsq_unit.hh3
-rw-r--r--cpu/o3/lsq_unit_impl.hh5
-rw-r--r--cpu/o3/rename.hh2
-rw-r--r--cpu/o3/rename_impl.hh8
-rw-r--r--cpu/ozone/cpu_impl.hh59
-rw-r--r--cpu/simple/cpu.cc31
-rw-r--r--sim/eventq.hh6
12 files changed, 142 insertions, 31 deletions
diff --git a/cpu/cpu_exec_context.cc b/cpu/cpu_exec_context.cc
index 0dcf149fd..9f151dd6a 100644
--- a/cpu/cpu_exec_context.cc
+++ b/cpu/cpu_exec_context.cc
@@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext)
if (quiesceEvent) {
quiesceEvent->xc = proxy;
}
+
+ Kernel::Statistics *stats = oldContext->getKernelStats();
+ if (stats) {
+ kernelStats = stats;
+ }
#endif
storeCondFailures = 0;
diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc
index 0025d4144..88de6c746 100644
--- a/cpu/o3/cpu.cc
+++ b/cpu/o3/cpu.cc
@@ -599,8 +599,11 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
// Be sure to signal that there's some activity so the CPU doesn't
// deschedule itself.
activityRec.activity();
+
+#if FULL_SYSTEM
if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled())
thread[tid]->quiesceEvent->deschedule();
+#endif
fetch.wakeFromQuiesce();
@@ -671,6 +674,8 @@ template <class Impl>
void
FullO3CPU<Impl>::switchOut(Sampler *_sampler)
{
+ DPRINTF(FullCPU, "Switching out\n");
+ BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
fetch.switchOut();
@@ -694,6 +699,41 @@ FullO3CPU<Impl>::signalSwitched()
rename.doSwitchOut();
commit.doSwitchOut();
instList.clear();
+
+#ifndef NDEBUG
+ PhysRegIndex renamed_reg;
+ // First loop through the integer registers.
+ for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
+ renamed_reg = renameMap[0].lookup(i);
+ assert(renamed_reg == commitRenameMap[0].lookup(i));
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+ renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag);
+ assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag));
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+
+ for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) {
+ renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs;
+
+ DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+ renamed_reg);
+
+ assert(scoreboard.getReg(renamed_reg));
+ }
+#endif
+
while (!removeList.empty()) {
removeList.pop();
}
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index cc09c4a41..7a3292dbe 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -391,6 +391,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
+ interruptPending = false;
branchPred.takeOverFrom();
}
@@ -469,7 +470,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (interruptPending && flags == 0) {
+ if (isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 80cd71f0d..e96fbc667 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -474,11 +474,11 @@ class InstructionQueue
Stats::Scalar<> iqSquashedNonSpecRemoved;
/** Distribution of number of instructions in the queue. */
- Stats::VectorDistribution<> queueResDist;
+// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction. */
- Stats::VectorDistribution<> issueDelayDist;
+// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index 72cb0d708..b6b06ca77 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
-
+/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
+*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
-
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
-
+*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index fe174a97d..1db6dc02d 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -382,6 +382,9 @@ class LSQUnit {
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index 5cc3078f8..7086c381e 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -144,6 +144,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -495,6 +499,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
+ ++lsqMemOrderViolation;
return genMachineCheckFault();
}
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh
index 4912431ad..5769dbd37 100644
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -411,6 +411,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
+ PhysRegIndex maxPhysicalRegs;
+
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index 93f5b3504..49627e3d4 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -40,7 +40,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -283,6 +284,11 @@ DefaultRename<Impl>::doSwitchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 050bdb9a3..1a0de29f5 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -184,7 +184,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
globalSeqNum = 1;
+#if FULL_SYSTEM
checkInterrupts = false;
+#endif
lockFlag = 0;
@@ -213,6 +215,7 @@ template <class Impl>
void
OzoneCPU<Impl>::switchOut(Sampler *_sampler)
{
+ BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
// Front end needs state from back end, so switch out the back end first.
@@ -234,6 +237,16 @@ OzoneCPU<Impl>::signalSwitched()
checker->switchOut(sampler);
_status = SwitchedOut;
+#ifndef NDEBUG
+ // Loop through all registers
+ for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i] == frontEnd->renameTable[i]);
+
+ assert(thread.renameTable[i] == backEnd->renameTable[i]);
+
+ DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
+ }
+#endif
if (tickEvent.scheduled())
tickEvent.squash();
@@ -256,9 +269,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
frontEnd->takeOverFrom();
assert(!tickEvent.scheduled());
+#ifndef NDEBUG
+ // Check rename table.
+ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ assert(thread.renameTable[i]->isResultReady());
+ }
+#endif
+
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < 15; ++i) {
comm.advance();
}
@@ -291,8 +311,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
scheduleTickEvent(delay);
_status = Running;
thread._status = ExecContext::Active;
+#if FULL_SYSTEM
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
thread.quiesceEvent->deschedule();
+#endif
frontEnd->wakeFromQuiesce();
}
@@ -369,7 +391,7 @@ template <class Impl>
void
OzoneCPU<Impl>::resetStats()
{
- startNumInst = numInst;
+// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -777,7 +799,9 @@ OzoneCPU<Impl>::OzoneXC::halt()
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
-{ }
+{
+ thread->dumpFuncProfile();
+}
#endif
template <class Impl>
@@ -797,6 +821,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
+ thread->inst = old_context->getInst();
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#else
@@ -869,16 +894,14 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileClear()
{
- if (thread->profile)
- thread->profile->clear();
+ thread->profileClear();
}
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileSample()
{
- if (thread->profile)
- thread->profile->sample(thread->profileNode, thread->profilePC);
+ thread->profileSample();
}
#endif
@@ -906,14 +929,20 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
cpu->frontEnd->setPC(thread->PC);
cpu->frontEnd->setNextPC(thread->nextPC);
- for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
- if (i < TheISA::FP_Base_DepTag) {
- thread->renameTable[i]->setIntResult(xc->readIntReg(i));
- } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
- int fp_idx = i - TheISA::FP_Base_DepTag;
- thread->renameTable[i]->setDoubleResult(
- xc->readFloatRegDouble(fp_idx));
- }
+ // First loop through the integer registers.
+ for (int i = 0; i < TheISA::NumIntRegs; ++i) {
+/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
+ "now has data %lli.\n",
+ i, thread->renameTable[i]->readIntResult(),
+ xc->readIntReg(i));
+*/
+ thread->renameTable[i]->setIntResult(xc->readIntReg(i));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
+ int fp_idx = i + TheISA::FP_Base_DepTag;
+ thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i));
}
#if !FULL_SYSTEM
diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc
index 0a4b3c3e4..eb19115b2 100644
--- a/cpu/simple/cpu.cc
+++ b/cpu/simple/cpu.cc
@@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s)
_status = SwitchedOut;
if (tickEvent.scheduled())
- tickEvent.squash();
+ tickEvent.deschedule();
+
+ assert(!tickEvent.scheduled());
sampler->signalSwitched();
}
@@ -294,7 +296,7 @@ SimpleCPU::regStats()
void
SimpleCPU::resetStats()
{
- startNumInst = numInst;
+// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src)
Fault fault = cpuXC->translateDataReadReq(memReq);
if (fault == NoFault) {
+ panic("We can't copy!");
cpuXC->copySrcAddr = src;
cpuXC->copySrcPhysAddr = memReq->paddr + offset;
} else {
@@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr)
void
SimpleCPU::processCacheCompletion()
{
+ Fault fault;
+
switch (status()) {
case IcacheMissStall:
icacheStallCycles += curTick - lastIcacheStall;
@@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion()
break;
case DcacheMissSwitch:
if (memReq->cmd.isRead()) {
- curStaticInst->execute(this,traceData);
+ fault = curStaticInst->execute(this,traceData);
if (traceData)
traceData->finalize();
+ } else {
+ fault = NoFault;
}
+ assert(fault == NoFault);
+ assert(!tickEvent.scheduled());
_status = SwitchedOut;
sampler->signalSwitched();
+ return;
case SwitchedOut:
// If this CPU has been switched out due to sampling/warm-up,
// ignore any further status changes (e.g., due to cache
@@ -787,9 +797,10 @@ SimpleCPU::tick()
}
if (cpuXC->profile) {
- bool usermode =
- (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
- cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+// bool usermode =
+// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+ cpuXC->profilePC = cpuXC->readPC();
ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
if (node)
cpuXC->profileNode = node;
@@ -849,8 +860,10 @@ SimpleCPU::tick()
status() == Idle ||
status() == DcacheMissStall);
- if (status() == Running && !tickEvent.scheduled())
+ if (status() == Running && !tickEvent.scheduled()) {
+ assert(_status != SwitchedOut);
tickEvent.schedule(curTick + cycles(1));
+ }
}
////////////////////////////////////////////////////////////////////////
@@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+ Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
@@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
+ INIT_PARAM(stats_reset_inst,
+ "instruction to reset stats on"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0),
#if FULL_SYSTEM
@@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
+ params->stats_reset_inst = stats_reset_inst;
params->deferRegistration = defer_registration;
params->clock = clock;
params->functionTrace = function_trace;
diff --git a/sim/eventq.hh b/sim/eventq.hh
index 5fc73bb53..b9a0abc12 100644
--- a/sim/eventq.hh
+++ b/sim/eventq.hh
@@ -43,6 +43,7 @@
#include "sim/host.hh" // for Tick
#include "base/fast_alloc.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "sim/serialize.hh"
@@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc
/// same cycle (after unscheduling the old CPU's tick event).
/// The switch needs to come before any tick events to make
/// sure we don't tick both CPUs in the same cycle.
- CPU_Switch_Pri = 31,
+ CPU_Switch_Pri = -31,
/// Serailization needs to occur before tick events also, so
/// that a serialize/unserialize is identical to an on-line
@@ -344,7 +345,8 @@ inline void
Event::schedule(Tick t)
{
assert(!scheduled());
- assert(t >= curTick);
+// if (t < curTick)
+// warn("t is less than curTick, ensure you don't want cycles");
setFlags(Scheduled);
#if TRACING_ON