summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-05-04 11:36:20 -0400
committerKevin Lim <ktlim@umich.edu>2006-05-04 11:36:20 -0400
commitf3358e5f7b6452f14a6df5106129ef0cb2ed8b65 (patch)
tree284685f873ef56b9c9ae95131129c51193d3185f
parent4601230d35de7bbda5906d04a28e2387f0e5177b (diff)
downloadgem5-f3358e5f7b6452f14a6df5106129ef0cb2ed8b65.tar.xz
O3 CPU now handles being used with the sampler.
cpu/o3/2bit_local_pred.cc: cpu/o3/2bit_local_pred.hh: cpu/o3/bpred_unit.hh: cpu/o3/bpred_unit_impl.hh: cpu/o3/btb.cc: cpu/o3/btb.hh: cpu/o3/commit.hh: cpu/o3/commit_impl.hh: cpu/o3/cpu.cc: cpu/o3/cpu.hh: cpu/o3/decode.hh: cpu/o3/decode_impl.hh: cpu/o3/fetch.hh: cpu/o3/fetch_impl.hh: cpu/o3/fu_pool.cc: cpu/o3/fu_pool.hh: cpu/o3/iew.hh: cpu/o3/iew_impl.hh: cpu/o3/inst_queue.hh: cpu/o3/inst_queue_impl.hh: cpu/o3/lsq.hh: cpu/o3/lsq_impl.hh: cpu/o3/lsq_unit.hh: cpu/o3/lsq_unit_impl.hh: cpu/o3/mem_dep_unit.hh: cpu/o3/mem_dep_unit_impl.hh: cpu/o3/ras.cc: cpu/o3/ras.hh: cpu/o3/rename.hh: cpu/o3/rename_impl.hh: cpu/o3/rob.hh: cpu/o3/rob_impl.hh: cpu/o3/sat_counter.cc: cpu/o3/sat_counter.hh: cpu/o3/thread_state.hh: Handle switching out and taking over. Needs to be able to reset all state. cpu/o3/alpha_cpu_impl.hh: Handle taking over from another XC. --HG-- extra : convert_revision : b936e826f0f8a18319bfa940ff35097b4192b449
-rw-r--r--cpu/o3/2bit_local_pred.cc8
-rw-r--r--cpu/o3/2bit_local_pred.hh2
-rw-r--r--cpu/o3/alpha_cpu_impl.hh20
-rw-r--r--cpu/o3/bpred_unit.hh4
-rw-r--r--cpu/o3/bpred_unit_impl.hh21
-rw-r--r--cpu/o3/btb.cc8
-rw-r--r--cpu/o3/btb.hh2
-rw-r--r--cpu/o3/commit.hh6
-rw-r--r--cpu/o3/commit_impl.hh38
-rw-r--r--cpu/o3/cpu.cc76
-rw-r--r--cpu/o3/cpu.hh9
-rw-r--r--cpu/o3/decode.hh5
-rw-r--r--cpu/o3/decode_impl.hh50
-rw-r--r--cpu/o3/fetch.hh13
-rw-r--r--cpu/o3/fetch_impl.hh70
-rw-r--r--cpu/o3/fu_pool.cc14
-rw-r--r--cpu/o3/fu_pool.hh3
-rw-r--r--cpu/o3/iew.hh8
-rw-r--r--cpu/o3/iew_impl.hh56
-rw-r--r--cpu/o3/inst_queue.hh14
-rw-r--r--cpu/o3/inst_queue_impl.hh160
-rw-r--r--cpu/o3/lsq.hh12
-rw-r--r--cpu/o3/lsq_impl.hh19
-rw-r--r--cpu/o3/lsq_unit.hh49
-rw-r--r--cpu/o3/lsq_unit_impl.hh90
-rw-r--r--cpu/o3/mem_dep_unit.hh4
-rw-r--r--cpu/o3/mem_dep_unit_impl.hh20
-rw-r--r--cpu/o3/ras.cc9
-rw-r--r--cpu/o3/ras.hh2
-rw-r--r--cpu/o3/rename.hh5
-rw-r--r--cpu/o3/rename_impl.hh67
-rw-r--r--cpu/o3/rob.hh4
-rw-r--r--cpu/o3/rob_impl.hh25
-rw-r--r--cpu/o3/sat_counter.cc24
-rw-r--r--cpu/o3/sat_counter.hh19
-rw-r--r--cpu/o3/thread_state.hh2
36 files changed, 786 insertions, 152 deletions
diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc
index 458fbd663..eab98531d 100644
--- a/cpu/o3/2bit_local_pred.cc
+++ b/cpu/o3/2bit_local_pred.cc
@@ -67,6 +67,14 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
instShiftAmt);
}
+void
+DefaultBP::reset()
+{
+ for (int i = 0; i < localPredictorSets; ++i) {
+ localCtrs[i].reset();
+ }
+}
+
bool
DefaultBP::lookup(Addr &branch_addr)
{
diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh
index 38d3f4842..0dfe53819 100644
--- a/cpu/o3/2bit_local_pred.hh
+++ b/cpu/o3/2bit_local_pred.hh
@@ -62,6 +62,8 @@ class DefaultBP
*/
void update(Addr &branch_addr, bool taken);
+ void reset();
+
private:
/**
diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh
index 86f7d9f28..7a2d5d2b9 100644
--- a/cpu/o3/alpha_cpu_impl.hh
+++ b/cpu/o3/alpha_cpu_impl.hh
@@ -151,6 +151,26 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
{
+ // some things should already be set up
+ assert(getMemPtr() == old_context->getMemPtr());
+#if FULL_SYSTEM
+ assert(getSystemPtr() == old_context->getSystemPtr());
+#else
+ assert(getProcessPtr() == old_context->getProcessPtr());
+#endif
+
+ // copy over functional state
+ setStatus(old_context->status());
+ copyArchRegs(old_context);
+ setCpuId(old_context->readCpuId());
+#if !FULL_SYSTEM
+ thread->funcExeInst = old_context->readFuncExeInst();
+#endif
+
+ old_context->setStatus(ExecContext::Unallocated);
+
+ thread->inSyscall = false;
+ thread->trapPending = false;
}
template <class Impl>
diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh
index 67c300989..ee7ffc183 100644
--- a/cpu/o3/bpred_unit.hh
+++ b/cpu/o3/bpred_unit.hh
@@ -67,6 +67,10 @@ class TwobitBPredUnit
*/
void regStats();
+ void switchOut();
+
+ void takeOverFrom();
+
/**
* Predicts whether or not the instruction is a taken branch, and the
* target of the branch if it is taken.
diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh
index f79b67b6c..872c0c62e 100644
--- a/cpu/o3/bpred_unit_impl.hh
+++ b/cpu/o3/bpred_unit_impl.hh
@@ -95,6 +95,26 @@ TwobitBPredUnit<Impl>::regStats()
}
template <class Impl>
+void
+TwobitBPredUnit<Impl>::switchOut()
+{
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ predHist[i].clear();
+ }
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::takeOverFrom()
+{
+ for (int i = 0; i < Impl::MaxThreads; ++i)
+ RAS[i].reset();
+
+ BP.reset();
+ BTB.reset();
+}
+
+template <class Impl>
bool
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
{
@@ -297,5 +317,6 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
BP.update(pred_hist.front().PC, actually_taken);
BTB.update(pred_hist.front().PC, corr_target, tid);
+ pred_hist.pop_front();
}
}
diff --git a/cpu/o3/btb.cc b/cpu/o3/btb.cc
index e084142d7..e5f69043a 100644
--- a/cpu/o3/btb.cc
+++ b/cpu/o3/btb.cc
@@ -58,6 +58,14 @@ DefaultBTB::DefaultBTB(unsigned _numEntries,
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
}
+void
+DefaultBTB::reset()
+{
+ for (int i = 0; i < numEntries; ++i) {
+ btb[i].valid = false;
+ }
+}
+
inline
unsigned
DefaultBTB::getIndex(const Addr &inst_PC)
diff --git a/cpu/o3/btb.hh b/cpu/o3/btb.hh
index aaa9945f7..b9ff42573 100644
--- a/cpu/o3/btb.hh
+++ b/cpu/o3/btb.hh
@@ -65,6 +65,8 @@ class DefaultBTB
DefaultBTB(unsigned numEntries, unsigned tagBits,
unsigned instShiftAmt);
+ void reset();
+
/** Looks up an address in the BTB. Must call valid() first on the address.
* @param inst_PC The address of the branch to look up.
* @param tid The thread id.
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh
index f374b8fb7..028bd5295 100644
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -175,6 +175,10 @@ class DefaultCommit
/** Initializes stage by sending back the number of free entries. */
void initStage();
+ void switchOut();
+
+ void takeOverFrom();
+
/** Ticks the commit stage, which tries to commit instructions. */
void tick();
@@ -351,6 +355,8 @@ class DefaultCommit
/** Number of Active Threads */
unsigned numThreads;
+ bool switchedOut;
+
Tick trapLatency;
Tick fetchTrapLatency;
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index 157e688c7..7834460e2 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -54,6 +54,7 @@ template <class Impl>
void
DefaultCommit<Impl>::TrapEvent::process()
{
+ // This will get reset if it was switched out.
commit->trapSquash[tid] = true;
}
@@ -75,7 +76,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameWidth(params->renameWidth),
iewWidth(params->executeWidth),
commitWidth(params->commitWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ switchedOut(false)
{
_status = Active;
_nextStatus = Inactive;
@@ -254,6 +256,9 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
// Commit must broadcast the number of free entries it has at the start of
// the simulation, so it starts as active.
cpu->activateStage(FullCPU::CommitIdx);
+
+ trapLatency = cpu->cycles(6);
+ fetchTrapLatency = cpu->cycles(12);
}
template <class Impl>
@@ -362,6 +367,29 @@ DefaultCommit<Impl>::initStage()
template <class Impl>
void
+DefaultCommit<Impl>::switchOut()
+{
+ rob->switchOut();
+}
+
+template <class Impl>
+void
+DefaultCommit<Impl>::takeOverFrom()
+{
+ _status = Active;
+ _nextStatus = Inactive;
+ for (int i=0; i < numThreads; i++) {
+ commitStatus[i] = Idle;
+ changedROBNumEntries[i] = false;
+ trapSquash[i] = false;
+ xcSquash[i] = false;
+ }
+ squashCounter = 0;
+ rob->takeOverFrom();
+}
+
+template <class Impl>
+void
DefaultCommit<Impl>::updateStatus()
{
if (commitStatus[0] == TrapPending ||
@@ -719,8 +747,9 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
- if (fromFetch->fetchFault) {
+ if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
// Record the fault. Wait until it's empty in the ROB. Then handle the trap.
+ // Ignore it if there's already a trap pending as fetch will be redirected.
fetchFault = fromFetch->fetchFault;
fetchFaultSN = fromFetch->fetchFaultSN;
fetchFaultTick = curTick + fetchTrapLatency;
@@ -975,6 +1004,7 @@ DefaultCommit<Impl>::commitInsts()
}
PC[tid] = nextPC[tid];
+ nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
#if FULL_SYSTEM
int count = 0;
Addr oldpc;
@@ -1002,6 +1032,10 @@ DefaultCommit<Impl>::commitInsts()
DPRINTF(CommitRate, "%i\n", num_committed);
numCommittedDist.sample(num_committed);
+
+ if (num_committed == commitWidth) {
+ commit_eligible[0]++;
+ }
}
template <class Impl>
diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc
index ac8c4236e..fc8372026 100644
--- a/cpu/o3/cpu.cc
+++ b/cpu/o3/cpu.cc
@@ -124,6 +124,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
mem(params->mem),
#else
// pTable(params->pTable),
+ mem(params->workload[0]->getMemory()),
#endif // FULL_SYSTEM
icacheInterface(params->icacheInterface),
@@ -176,9 +177,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
numThreads = number_of_threads;
#if !FULL_SYSTEM
- int activeThreads = params->workload.size();
+ int active_threads = params->workload.size();
#else
- int activeThreads = 1;
+ int active_threads = 1;
#endif
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
@@ -192,7 +193,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
for (int tid=0; tid < numThreads; tid++) {
- bool bindRegs = (tid <= activeThreads - 1);
+ bool bindRegs = (tid <= active_threads - 1);
commitRenameMap[tid].init(TheISA::NumIntRegs,
params->numPhysIntRegs,
@@ -357,7 +358,7 @@ FullO3CPU<Impl>::tick()
}
if (activityCount && !tickEvent.scheduled()) {
- tickEvent.schedule(curTick + 1);
+ tickEvent.schedule(curTick + cycles(1));
}
#if !FULL_SYSTEM
@@ -370,8 +371,8 @@ template <class Impl>
void
FullO3CPU<Impl>::init()
{
- if (deferRegistration) {
- return;
+ if (!deferRegistration) {
+ registerExecContexts();
}
// Set inSyscall so that the CPU doesn't squash when initially
@@ -379,7 +380,6 @@ FullO3CPU<Impl>::init()
for (int i = 0; i < number_of_threads; ++i)
thread[i]->inSyscall = true;
- registerExecContexts();
// Need to do a copy of the xc->regs into the CPU's regfile so
// that it can start properly.
@@ -388,7 +388,7 @@ FullO3CPU<Impl>::init()
// Need to do a copy of the xc->regs into the CPU's regfile so
// that it can start properly.
#if FULL_SYSTEM
- ExecContext *src_xc = system->execContexts[tid];
+ ExecContext *src_xc = execContexts[tid];
#else
ExecContext *src_xc = thread[tid]->getXCProxy();
#endif
@@ -584,7 +584,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
activeThreads.push_back(tid);
}
- assert(_status == Idle);
+ assert(_status == Idle || _status == SwitchedOut);
scheduleTickEvent(delay);
@@ -658,21 +658,64 @@ FullO3CPU<Impl>::haltContext(int tid)
template <class Impl>
void
-FullO3CPU<Impl>::switchOut()
+FullO3CPU<Impl>::switchOut(Sampler *sampler)
{
- panic("FullO3CPU does not have a switch out function.\n");
+// panic("FullO3CPU does not have a switch out function.\n");
+ fetch.switchOut();
+ decode.switchOut();
+ rename.switchOut();
+ iew.switchOut();
+ commit.switchOut();
+ if (tickEvent.scheduled())
+ tickEvent.squash();
+ sampler->signalSwitched();
+ _status = SwitchedOut;
}
template <class Impl>
void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
+ for (int i = 0; i < 6; ++i) {
+ timeBuffer.advance();
+ fetchQueue.advance();
+ decodeQueue.advance();
+ renameQueue.advance();
+ iewQueue.advance();
+ activityBuffer.advance();
+ }
+
+ activityCount = 0;
+ bzero(&stageActive, sizeof(stageActive));
+
BaseCPU::takeOverFrom(oldCPU);
+ fetch.takeOverFrom();
+ decode.takeOverFrom();
+ rename.takeOverFrom();
+ iew.takeOverFrom();
+ commit.takeOverFrom();
+
assert(!tickEvent.scheduled());
+ // @todo: Figure out how to properly select the tid to put onto the active threads list.
+ int tid = 0;
+
+ list<unsigned>::iterator isActive = find(
+ activeThreads.begin(), activeThreads.end(), tid);
+
+ if (isActive == activeThreads.end()) {
+ //May Need to Re-code this if the delay variable is the
+ //delay needed for thread to activate
+ DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
+ tid);
+
+ activeThreads.push_back(tid);
+ }
+
// Set all status's to active, schedule the
// CPU's tick event.
+ // @todo: Fix up statuses so this is handled properly
for (int i = 0; i < execContexts.size(); ++i) {
ExecContext *xc = execContexts[i];
if (xc->status() == ExecContext::Active && _status != Running) {
@@ -680,6 +723,8 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
}
+ if (!tickEvent.scheduled())
+ tickEvent.schedule(curTick);
}
template <class Impl>
@@ -758,7 +803,8 @@ template <class Impl>
float
FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegSingle(phys_reg);
}
@@ -767,7 +813,8 @@ template <class Impl>
double
FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegDouble(phys_reg);
}
@@ -776,7 +823,8 @@ template <class Impl>
uint64_t
FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
{
- PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegInt(phys_reg);
}
diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh
index 91eaf9d6f..621ddf541 100644
--- a/cpu/o3/cpu.hh
+++ b/cpu/o3/cpu.hh
@@ -82,7 +82,8 @@ class FullO3CPU : public BaseFullCPU
Running,
Idle,
Halted,
- Blocked
+ Blocked,
+ SwitchedOut
};
/** Overall CPU status. */
@@ -112,9 +113,9 @@ class FullO3CPU : public BaseFullCPU
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
- tickEvent.reschedule(curTick + delay);
+ tickEvent.reschedule(curTick + cycles(delay));
else if (!tickEvent.scheduled())
- tickEvent.schedule(curTick + delay);
+ tickEvent.schedule(curTick + cycles(delay));
}
/** Unschedule tick event, regardless of its current state. */
@@ -196,7 +197,7 @@ class FullO3CPU : public BaseFullCPU
/** Switches out this CPU.
* @todo: Implement this.
*/
- void switchOut();
+ void switchOut(Sampler *sampler);
/** Takes over from another CPU.
* @todo: Implement this.
diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh
index 279ff556e..3f3f68247 100644
--- a/cpu/o3/decode.hh
+++ b/cpu/o3/decode.hh
@@ -107,6 +107,9 @@ class DefaultDecode
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
+ void switchOut();
+
+ void takeOverFrom();
/** Ticks decode, processing all input signals and decoding as many
* instructions as possible.
*/
@@ -272,6 +275,8 @@ class DefaultDecode
Stats::Scalar<> decodeUnblockCycles;
/** Stat for total number of squashing cycles. */
Stats::Scalar<> decodeSquashCycles;
+ /** Stat for number of times a branch is resolved at decode. */
+ Stats::Scalar<> decodeBranchResolved;
/** Stat for number of times a branch mispredict is detected. */
Stats::Scalar<> decodeBranchMispred;
/** Stat for number of times decode detected a non-control instruction
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh
index f1aea27b4..caa97067b 100644
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -66,40 +66,44 @@ void
DefaultDecode<Impl>::regStats()
{
decodeIdleCycles
- .name(name() + ".decodeIdleCycles")
+ .name(name() + ".DECODE:IdleCycles")
.desc("Number of cycles decode is idle")
.prereq(decodeIdleCycles);
decodeBlockedCycles
- .name(name() + ".decodeBlockedCycles")
+ .name(name() + ".DECODE:BlockedCycles")
.desc("Number of cycles decode is blocked")
.prereq(decodeBlockedCycles);
decodeRunCycles
- .name(name() + ".decodeRunCycles")
+ .name(name() + ".DECODE:RunCycles")
.desc("Number of cycles decode is running")
.prereq(decodeRunCycles);
decodeUnblockCycles
- .name(name() + ".decodeUnblockCycles")
+ .name(name() + ".DECODE:UnblockCycles")
.desc("Number of cycles decode is unblocking")
.prereq(decodeUnblockCycles);
decodeSquashCycles
- .name(name() + ".decodeSquashCycles")
+ .name(name() + ".DECODE:SquashCycles")
.desc("Number of cycles decode is squashing")
.prereq(decodeSquashCycles);
+ decodeBranchResolved
+ .name(name() + ".DECODE:BranchResolved")
+ .desc("Number of times decode resolved a branch")
+ .prereq(decodeBranchResolved);
decodeBranchMispred
- .name(name() + ".decodeBranchMispred")
+ .name(name() + ".DECODE:BranchMispred")
.desc("Number of times decode detected a branch misprediction")
.prereq(decodeBranchMispred);
decodeControlMispred
- .name(name() + ".decodeControlMispred")
+ .name(name() + ".DECODE:ControlMispred")
.desc("Number of times decode detected an instruction incorrectly"
" predicted as a control")
.prereq(decodeControlMispred);
decodeDecodedInsts
- .name(name() + ".decodeDecodedInsts")
+ .name(name() + ".DECODE:DecodedInsts")
.desc("Number of instructions handled by decode")
.prereq(decodeDecodedInsts);
decodeSquashedInsts
- .name(name() + ".decodeSquashedInsts")
+ .name(name() + ".DECODE:SquashedInsts")
.desc("Number of squashed instructions handled by decode")
.prereq(decodeSquashedInsts);
}
@@ -158,6 +162,33 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
activeThreads = at_ptr;
}
+template <class Impl>
+void
+DefaultDecode<Impl>::switchOut()
+{
+}
+
+template <class Impl>
+void
+DefaultDecode<Impl>::takeOverFrom()
+{
+ _status = Inactive;
+
+ for (int i = 0; i < numThreads; ++i) {
+ decodeStatus[i] = Idle;
+
+ stalls[i].rename = false;
+ stalls[i].iew = false;
+ stalls[i].commit = false;
+ while (!insts[i].empty())
+ insts[i].pop();
+ while (!skidBuffer[i].empty())
+ skidBuffer[i].pop();
+ branchCount[i] = 0;
+ }
+ wroteToTimeBuffer = false;
+}
+
template<class Impl>
bool
DefaultDecode<Impl>::checkStall(unsigned tid) const
@@ -680,6 +711,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
+ ++decodeBranchResolved;
inst->setNextPC(inst->branchTarget());
if (inst->mispredicted()) {
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh
index f0b15cb86..6074831c6 100644
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -35,6 +35,8 @@
#include "mem/mem_interface.hh"
#include "sim/eventq.hh"
+class Sampler;
+
/**
* DefaultFetch class handles both single threaded and SMT fetch. Its width is
* specified by the parameters; each cycle it tries to fetch that many
@@ -81,6 +83,7 @@ class DefaultFetch
Fetching,
TrapPending,
QuiescePending,
+ SwitchOut,
IcacheMissStall,
IcacheMissComplete
};
@@ -160,6 +163,12 @@ class DefaultFetch
/** Processes cache completion event. */
void processCacheCompletion(MemReqPtr &req);
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
void wakeFromQuiesce();
private:
@@ -360,6 +369,8 @@ class DefaultFetch
bool interruptPending;
+ bool switchedOut;
+
#if !FULL_SYSTEM
/** Page table pointer. */
// PageTable *pTable;
@@ -382,6 +393,8 @@ class DefaultFetch
*/
Stats::Scalar<> fetchIdleCycles;
Stats::Scalar<> fetchBlockedCycles;
+
+ Stats::Scalar<> fetchMiscStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh
index 563a767df..92f923c65 100644
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -169,53 +169,59 @@ void
DefaultFetch<Impl>::regStats()
{
icacheStallCycles
- .name(name() + ".icacheStallCycles")
+ .name(name() + ".FETCH:icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
- .name(name() + ".fetchedInsts")
+ .name(name() + ".FETCH:Insts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
- .name(name() + ".fetchedBranches")
+ .name(name() + ".FETCH:Branches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
- .name(name() + ".predictedBranches")
+ .name(name() + ".FETCH:predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
- .name(name() + ".fetchCycles")
+ .name(name() + ".FETCH:Cycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
- .name(name() + ".fetchSquashCycles")
+ .name(name() + ".FETCH:SquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchIdleCycles
- .name(name() + ".fetchIdleCycles")
+ .name(name() + ".FETCH:IdleCycles")
.desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles);
fetchBlockedCycles
- .name(name() + ".fetchBlockedCycles")
+ .name(name() + ".FETCH:BlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
- .name(name() + ".fetchedCacheLines")
+ .name(name() + ".FETCH:CacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
+ fetchMiscStallCycles
+ .name(name() + ".FETCH:MiscStallCycles")
+ .desc("Number of cycles fetch has spent waiting on interrupts, or "
+ "bad addresses, or out of MSHRs")
+ .prereq(fetchMiscStallCycles);
+
fetchIcacheSquashes
- .name(name() + ".fetchIcacheSquashes")
+ .name(name() + ".FETCH:IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
@@ -223,24 +229,24 @@ DefaultFetch<Impl>::regStats()
.init(/* base value */ 0,
/* last value */ fetchWidth,
/* bucket size */ 1)
- .name(name() + ".rateDist")
+ .name(name() + ".FETCH:rateDist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf);
idleRate
- .name(name() + ".idleRate")
+ .name(name() + ".FETCH:idleRate")
.desc("Percent of cycles fetch was idle")
.prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate
- .name(name() + ".branchRate")
+ .name(name() + ".FETCH:branchRate")
.desc("Number of branch fetches per cycle")
.flags(Stats::total);
branchRate = predictedBranches / cpu->numCycles;
fetchRate
- .name(name() + ".rate")
+ .name(name() + ".FETCH:rate")
.desc("Number of inst fetches per cycle")
.flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles;
@@ -332,7 +338,8 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
if (fetchStatus[tid] != IcacheMissStall ||
- req != memReq[tid]) {
+ req != memReq[tid] ||
+ isSwitchedOut()) {
++fetchIcacheSquashes;
return;
}
@@ -362,6 +369,35 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
template <class Impl>
void
+DefaultFetch<Impl>::switchOut()
+{
+ switchedOut = true;
+ branchPred.switchOut();
+}
+
+template <class Impl>
+void
+DefaultFetch<Impl>::takeOverFrom()
+{
+ // Reset all state
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ stalls[i].decode = 0;
+ stalls[i].rename = 0;
+ stalls[i].iew = 0;
+ stalls[i].commit = 0;
+ PC[i] = cpu->readPC(i);
+ nextPC[i] = cpu->readNextPC(i);
+ fetchStatus[i] = Running;
+ }
+ numInst = 0;
+ wroteToTimeBuffer = false;
+ _status = Inactive;
+ switchedOut = false;
+ branchPred.takeOverFrom();
+}
+
+template <class Impl>
+void
DefaultFetch<Impl>::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
@@ -902,8 +938,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
tid, fetch_PC);
bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
- if (!fetch_success)
+ if (!fetch_success) {
+ ++fetchMiscStallCycles;
return;
+ }
} else {
if (fetchStatus[tid] == Idle) {
++fetchIdleCycles;
diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc
index 9b6ac15d9..cb7a15061 100644
--- a/cpu/o3/fu_pool.cc
+++ b/cpu/o3/fu_pool.cc
@@ -242,6 +242,20 @@ FUPool::dump()
}
}
+void
+FUPool::switchOut()
+{
+}
+
+void
+FUPool::takeOverFrom()
+{
+ for (int i = 0; i < numFU; i++) {
+ unitBusy[i] = false;
+ }
+ unitsToBeFreed.clear();
+}
+
//
////////////////////////////////////////////////////////////////////////////
diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh
index d7b7acadb..7df5ad5f3 100644
--- a/cpu/o3/fu_pool.hh
+++ b/cpu/o3/fu_pool.hh
@@ -154,6 +154,9 @@ class FUPool : public SimObject
unsigned getIssueLatency(OpClass capability) {
return maxIssueLatencies[capability];
}
+
+ void switchOut();
+ void takeOverFrom();
};
#endif // __CPU_O3_FU_POOL_HH__
diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh
index 58cd68b21..ae0ba6a21 100644
--- a/cpu/o3/iew.hh
+++ b/cpu/o3/iew.hh
@@ -157,6 +157,12 @@ class DefaultIEW
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr);
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
/** Sets page table pointer within LSQ. */
// void setPageTable(PageTable *pt_ptr);
@@ -420,6 +426,8 @@ class DefaultIEW
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
+ bool switchedOut;
+
/** Stat for total number of idle cycles. */
Stats::Scalar<> iewIdleCycles;
/** Stat for total number of squashing cycles. */
diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh
index 2ae2e1361..42d83ee72 100644
--- a/cpu/o3/iew_impl.hh
+++ b/cpu/o3/iew_impl.hh
@@ -55,13 +55,13 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
- iewStage->wakeCPU();
-
- if (inst->isSquashed()) {
+ if (inst->isSquashed() || iewStage->isSwitchedOut()) {
inst = NULL;
return;
}
+ iewStage->wakeCPU();
+
if (!inst->isExecuted()) {
inst->setExecuted();
@@ -101,7 +101,8 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
issueReadWidth(params->issueWidth),
issueWidth(params->issueWidth),
executeWidth(params->executeWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ switchedOut(false)
{
DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth);
_status = Active;
@@ -436,6 +437,53 @@ DefaultIEW<Impl>::setPageTable(PageTable *pt_ptr)
}
#endif
+template <class Impl>
+void
+DefaultIEW<Impl>::switchOut()
+{
+ switchedOut = true;
+ instQueue.switchOut();
+ ldstQueue.switchOut();
+ fuPool->switchOut();
+
+ for (int i = 0; i < numThreads; i++) {
+ while (!insts[i].empty())
+ insts[i].pop();
+ while (!skidBuffer[i].empty())
+ skidBuffer[i].pop();
+ }
+}
+
+template <class Impl>
+void
+DefaultIEW<Impl>::takeOverFrom()
+{
+ _status = Active;
+ exeStatus = Running;
+ wbStatus = Idle;
+ switchedOut = false;
+
+ instQueue.takeOverFrom();
+ ldstQueue.takeOverFrom();
+ fuPool->takeOverFrom();
+
+ initStage();
+ cpu->activityThisCycle();
+
+ for (int i=0; i < numThreads; i++) {
+ dispatchStatus[i] = Running;
+ stalls[i].commit = false;
+ fetchRedirect[i] = false;
+ }
+
+ updateLSQNextCycle = false;
+
+ // @todo: Fix hardcoded number
+ for (int i = 0; i < 6; ++i) {
+ issueToExecQueue.advance();
+ }
+}
+
template<class Impl>
void
DefaultIEW<Impl>::squash(unsigned tid)
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh
index 06d9937f2..982294b4f 100644
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -112,6 +112,10 @@ class InstructionQueue
/** Registers statistics. */
void regStats();
+ void resetState();
+
+ void resetDependencyGraph();
+
/** Sets CPU pointer. */
void setCPU(FullCPU *_cpu) { cpu = _cpu; }
@@ -127,6 +131,12 @@ class InstructionQueue
/** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
/** Number of entries needed for given amount of threads. */
int entryAmount(int num_threads);
@@ -385,6 +395,8 @@ class InstructionQueue
*/
unsigned commitToIEWDelay;
+ bool switchedOut;
+
//////////////////////////////////
// Variables needed for squashing
//////////////////////////////////
@@ -507,7 +519,7 @@ class InstructionQueue
Stats::Scalar<> iqSquashedNonSpecRemoved;
Stats::VectorDistribution<> queue_res_dist;
- Stats::Vector<> n_issued_dist;
+ Stats::Distribution<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist;
Stats::Vector<> stat_fu_busy;
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh
index 804bc2472..0d9cc09f3 100644
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -82,15 +82,9 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
{
assert(fuPool);
- numThreads = params->numberOfThreads;
+ switchedOut = false;
- //Initialize thread IQ counts
- for (int i = 0; i <numThreads; i++) {
- count[i] = 0;
- }
-
- // Initialize the number of free IQ entries.
- freeEntries = numEntries;
+ numThreads = params->numberOfThreads;
// Set the number of physical registers as the number of int + float
numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
@@ -101,37 +95,24 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
//dependency graph.
dependGraph = new DependencyEntry[numPhysRegs];
- // Resize the register scoreboard.
- regScoreboard.resize(numPhysRegs);
-
- //Initialize Mem Dependence Units
- for (int i = 0; i < numThreads; i++) {
- memDepUnit[i].init(params,i);
- memDepUnit[i].setIQ(this);
- }
-
// Initialize all the head pointers to point to NULL, and all the
// entries as unready.
- // Note that in actuality, the registers corresponding to the logical
- // registers start off as ready. However this doesn't matter for the
- // IQ as the instruction should have been correctly told if those
- // registers are ready in rename. Thus it can all be initialized as
- // unready.
for (int i = 0; i < numPhysRegs; ++i) {
dependGraph[i].next = NULL;
dependGraph[i].inst = NULL;
- regScoreboard[i] = false;
}
- for (int i = 0; i < numThreads; ++i) {
- squashedSeqNum[i] = 0;
- }
+ // Resize the register scoreboard.
+ regScoreboard.resize(numPhysRegs);
- for (int i = 0; i < Num_OpClasses; ++i) {
- queueOnList[i] = false;
- readyIt[i] = listOrder.end();
+ //Initialize Mem Dependence Units
+ for (int i = 0; i < numThreads; i++) {
+ memDepUnit[i].init(params,i);
+ memDepUnit[i].setIQ(this);
}
+ resetState();
+
string policy = params->smtIQPolicy;
//Convert string to lowercase
@@ -184,30 +165,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
template <class Impl>
InstructionQueue<Impl>::~InstructionQueue()
{
- // Clear the dependency graph
- DependencyEntry *curr;
- DependencyEntry *prev;
-
- for (int i = 0; i < numPhysRegs; ++i) {
- curr = dependGraph[i].next;
-
- while (curr) {
- DependencyEntry::mem_alloc_counter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
-
- delete prev;
- }
-
- if (dependGraph[i].inst) {
- dependGraph[i].inst = NULL;
- }
-
- dependGraph[i].next = NULL;
- }
-
+ resetDependencyGraph();
assert(DependencyEntry::mem_alloc_counter == 0);
delete [] dependGraph;
@@ -307,10 +265,10 @@ InstructionQueue<Impl>::regStats()
queue_res_dist.subname(i, opClassStrings[i]);
}
n_issued_dist
- .init(totalWidth + 1)
+ .init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
- .flags(total | pdf | dist)
+ .flags(pdf)
;
/*
dist_unissued
@@ -402,6 +360,71 @@ InstructionQueue<Impl>::regStats()
template <class Impl>
void
+InstructionQueue<Impl>::resetState()
+{
+ //Initialize thread IQ counts
+ for (int i = 0; i <numThreads; i++) {
+ count[i] = 0;
+ instList[i].clear();
+ }
+
+ // Initialize the number of free IQ entries.
+ freeEntries = numEntries;
+
+ // Note that in actuality, the registers corresponding to the logical
+ // registers start off as ready. However this doesn't matter for the
+ // IQ as the instruction should have been correctly told if those
+ // registers are ready in rename. Thus it can all be initialized as
+ // unready.
+ for (int i = 0; i < numPhysRegs; ++i) {
+ regScoreboard[i] = false;
+ }
+
+ for (int i = 0; i < numThreads; ++i) {
+ squashedSeqNum[i] = 0;
+ }
+
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ while (!readyInsts[i].empty())
+ readyInsts[i].pop();
+ queueOnList[i] = false;
+ readyIt[i] = listOrder.end();
+ }
+ nonSpecInsts.clear();
+ listOrder.clear();
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::resetDependencyGraph()
+{
+ // Clear the dependency graph
+ DependencyEntry *curr;
+ DependencyEntry *prev;
+
+ for (int i = 0; i < numPhysRegs; ++i) {
+ curr = dependGraph[i].next;
+
+ while (curr) {
+ DependencyEntry::mem_alloc_counter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ if (dependGraph[i].inst) {
+ dependGraph[i].inst = NULL;
+ }
+
+ dependGraph[i].next = NULL;
+ }
+}
+
+template <class Impl>
+void
InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
{
DPRINTF(IQ, "Setting active threads list pointer.\n");
@@ -427,6 +450,25 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
}
template <class Impl>
+void
+InstructionQueue<Impl>::switchOut()
+{
+ resetState();
+ resetDependencyGraph();
+ switchedOut = true;
+ for (int i = 0; i < numThreads; ++i) {
+ memDepUnit[i].switchOut();
+ }
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::takeOverFrom()
+{
+ switchedOut = false;
+}
+
+template <class Impl>
int
InstructionQueue<Impl>::entryAmount(int num_threads)
{
@@ -685,6 +727,10 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{
// The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill.
+ if (isSwitchedOut()) {
+ return;
+ }
+
iewStage->wakeCPU();
fuPool->freeUnit(fu_idx);
@@ -816,7 +862,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
FUCompletion *execution = new FUCompletion(issuing_inst,
idx, this);
- execution->schedule(curTick + issue_latency - 1);
+ execution->schedule(curTick + cpu->cycles(issue_latency - 1));
} else {
i2e_info->insts[exec_queue_slot++] = issuing_inst;
i2e_info->size++;
@@ -862,6 +908,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
}
+ n_issued_dist.sample(total_issued);
+
if (total_issued) {
cpu->activityThisCycle();
} else {
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh
index c59b5f13b..d5f893e57 100644
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -71,6 +71,9 @@ class LSQ {
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
+ void switchOut();
+ void takeOverFrom();
+
/** Number of entries needed for the given amount of threads.*/
int entryAmount(int num_threads);
void removeEntries(unsigned tid);
@@ -271,15 +274,6 @@ class LSQ {
/** Max SQ Size - Used to Enforce Sharing Policies. */
unsigned maxSQEntries;
- /** Global Load Count. */
- int loads;
-
- /** Global Store Count */
- int stores;
-
- /** Global Store To WB Count */
- int storesToWB;
-
/** Number of Threads. */
unsigned numThreads;
};
diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh
index 523517869..c43c19619 100644
--- a/cpu/o3/lsq_impl.hh
+++ b/cpu/o3/lsq_impl.hh
@@ -33,7 +33,6 @@ using namespace std;
template <class Impl>
LSQ<Impl>::LSQ(Params *params)
: LQEntries(params->LQEntries), SQEntries(params->SQEntries),
- loads(0), stores(0), storesToWB(0),
numThreads(params->numberOfThreads)
{
DPRINTF(LSQ, "Creating LSQ object.\n");
@@ -144,6 +143,24 @@ LSQ<Impl>::setPageTable(PageTable *pt_ptr)
#endif
template <class Impl>
+void
+LSQ<Impl>::switchOut()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].switchOut();
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::takeOverFrom()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].takeOverFrom();
+ }
+}
+
+template <class Impl>
int
LSQ<Impl>::entryAmount(int num_threads)
{
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh
index ba8b1d2e2..d17efe96a 100644
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -38,6 +38,7 @@
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
+#include "sim/debug.hh"
#include "sim/sim_object.hh"
#include "arch/faults.hh"
@@ -110,6 +111,12 @@ class LSQUnit {
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
/** Ticks the LSQ unit, which in this case only resets the number of
* used cache ports.
* @todo: Move the number of used ports up to the LSQ level so it can
@@ -278,20 +285,20 @@ class LSQUnit {
/** Whether or not the store is completed. */
bool completed;
};
-
+/*
enum Status {
Running,
Idle,
DcacheMissStall,
DcacheMissSwitch
};
-
+*/
private:
/** The LSQUnit thread id. */
unsigned lsqID;
/** The status of the LSQ unit. */
- Status _status;
+// Status _status;
/** The store queue. */
std::vector<SQEntry> storeQueue;
@@ -335,6 +342,8 @@ class LSQUnit {
/** The number of used cache ports in this cycle. */
int usedPorts;
+ bool switchedOut;
+
//list<InstSeqNum> mshrSeqNums;
//Stats::Scalar<> dcacheStallCycles;
@@ -373,7 +382,25 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
+/*
+ // total number of loads forwaded from LSQ stores
+ Stats::Vector<> lsq_forw_loads;
+
+ // total number of loads ignored due to invalid addresses
+ Stats::Vector<> inv_addr_loads;
+
+ // total number of software prefetches ignored due to invalid addresses
+ Stats::Vector<> inv_addr_swpfs;
+
+ // total non-speculative bogus addresses seen (debug var)
+ Counter sim_invalid_addrs;
+ Stats::Vector<> fu_busy; //cumulative fu busy
+ // ready loads blocked due to memory disambiguation
+ Stats::Vector<> lsq_blocked_loads;
+
+ Stats::Scalar<> lsqInversion;
+*/
public:
/** Executes the load at the given index. */
template <class T>
@@ -590,7 +617,12 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
}
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
loadQueue[load_idx]->readPC());
-
+/*
+ Addr debug_addr = ULL(0xfffffc0000be81a8);
+ if (req->vaddr == debug_addr) {
+ debug_break();
+ }
+*/
assert(!req->completionEvent);
req->completionEvent =
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
@@ -608,7 +640,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
lastDcacheStall = curTick;
- _status = DcacheMissStall;
+// _status = DcacheMissStall;
} else {
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
@@ -694,7 +726,12 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
storeQueue[store_idx].req = req;
storeQueue[store_idx].size = sizeof(T);
storeQueue[store_idx].data = data;
-
+/*
+ Addr debug_addr = ULL(0xfffffc0000be81a8);
+ if (req->vaddr == debug_addr) {
+ debug_break();
+ }
+*/
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh
index d9a118b0e..c5ce34c70 100644
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -50,6 +50,9 @@ LSQUnit<Impl>::StoreCompletionEvent::process()
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+ if (lsqPtr->isSwitchedOut())
+ return;
+
lsqPtr->cpu->wakeCPU();
if (wbEvent)
wbEvent->process();
@@ -78,6 +81,8 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
{
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+ switchedOut = false;
+
lsqID = id;
LQEntries = maxLQEntries;
@@ -140,6 +145,89 @@ LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
template<class Impl>
void
+LSQUnit<Impl>::switchOut()
+{
+ switchedOut = true;
+ for (int i = 0; i < loadQueue.size(); ++i)
+ loadQueue[i] = NULL;
+
+ while (storesToWB > 0 &&
+ storeWBIdx != storeTail &&
+ storeQueue[storeWBIdx].inst &&
+ storeQueue[storeWBIdx].canWB) {
+
+ if (storeQueue[storeWBIdx].size == 0 ||
+ storeQueue[storeWBIdx].inst->isDataPrefetch() ||
+ storeQueue[storeWBIdx].committed ||
+ storeQueue[storeWBIdx].req->flags & LOCKED) {
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+
+ assert(storeQueue[storeWBIdx].req);
+ assert(!storeQueue[storeWBIdx].committed);
+
+ MemReqPtr req = storeQueue[storeWBIdx].req;
+ storeQueue[storeWBIdx].committed = true;
+
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
+
+ DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
+ req->paddr, *(req->data),
+ storeQueue[storeWBIdx].inst->seqNum);
+
+ switch(storeQueue[storeWBIdx].size) {
+ case 1:
+ cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data);
+ break;
+ case 2:
+ cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data);
+ break;
+ case 4:
+ cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data);
+ break;
+ case 8:
+ cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data);
+ break;
+ default:
+ panic("Unexpected store size!\n");
+ }
+ incrStIdx(storeWBIdx);
+ }
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::takeOverFrom()
+{
+ switchedOut = false;
+ loads = stores = storesToWB = 0;
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+
+ loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+
+ stalled = false;
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+}
+
+template<class Impl>
+void
LSQUnit<Impl>::resizeLQ(unsigned size)
{
assert( size >= LQEntries);
@@ -647,7 +735,7 @@ LSQUnit<Impl>::writebackStores()
lastDcacheStall = curTick;
- _status = DcacheMissStall;
+// _status = DcacheMissStall;
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh
index 32ce9f768..141e0fdc4 100644
--- a/cpu/o3/mem_dep_unit.hh
+++ b/cpu/o3/mem_dep_unit.hh
@@ -84,6 +84,10 @@ class MemDepUnit {
/** Registers statistics. */
void regStats();
+ void switchOut();
+
+ void takeOverFrom();
+
/** Sets the pointer to the IQ. */
void setIQ(InstructionQueue<Impl> *iq_ptr);
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh
index 771a0505e..05a33685d 100644
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -103,6 +103,26 @@ MemDepUnit<MemDepPred, Impl>::regStats()
template <class MemDepPred, class Impl>
void
+MemDepUnit<MemDepPred, Impl>::switchOut()
+{
+ for (int i = 0; i < Impl::MaxThreads; ++i) {
+ instList[i].clear();
+ }
+ instsToReplay.clear();
+ memDepHash.clear();
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::takeOverFrom()
+{
+ loadBarrier = storeBarrier = false;
+ loadBarrierSN = storeBarrierSN = 0;
+ depPred.clear();
+}
+
+template <class MemDepPred, class Impl>
+void
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
{
iqPtr = iq_ptr;
diff --git a/cpu/o3/ras.cc b/cpu/o3/ras.cc
index 5e7ef38ae..0b3ea4918 100644
--- a/cpu/o3/ras.cc
+++ b/cpu/o3/ras.cc
@@ -42,6 +42,15 @@ ReturnAddrStack::init(unsigned _numEntries)
}
void
+ReturnAddrStack::reset()
+{
+ usedEntries = 0;
+ tos = 0;
+ for (int i = 0; i < numEntries; ++i)
+ addrStack[i] = 0;
+}
+
+void
ReturnAddrStack::push(const Addr &return_addr)
{
incrTos();
diff --git a/cpu/o3/ras.hh b/cpu/o3/ras.hh
index 5aa4fc05f..27e7c2df4 100644
--- a/cpu/o3/ras.hh
+++ b/cpu/o3/ras.hh
@@ -47,6 +47,8 @@ class ReturnAddrStack
*/
void init(unsigned numEntries);
+ void reset();
+
/** Returns the top address on the RAS. */
Addr top()
{ return addrStack[tos]; }
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh
index c6f8f97aa..4c5c46356 100644
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -153,6 +153,10 @@ class DefaultRename
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard);
+ void switchOut();
+
+ void takeOverFrom();
+
/** Squashes all instructions in a thread. */
void squash(unsigned tid);
@@ -448,6 +452,7 @@ class DefaultRename
Stats::Scalar<> renameUndoneMaps;
Stats::Scalar<> renamedSerializing;
Stats::Scalar<> renamedTempSerializing;
+ Stats::Scalar<> renameSkidInsts;
};
#endif // __CPU_O3_RENAME_HH__
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh
index e29211921..d41058deb 100644
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -151,6 +151,11 @@ DefaultRename<Impl>::regStats()
.desc("count of temporary serializing insts renamed")
.flags(Stats::total)
;
+ renameSkidInsts
+ .name(name() + ".RENAME:skidInsts")
+ .desc("count of insts added to the skid buffer")
+ .flags(Stats::total)
+ ;
}
template <class Impl>
@@ -213,8 +218,8 @@ DefaultRename<Impl>::initStage()
// Clear these pointers so they are not accidentally used in
// non-initialization code.
- iew_ptr = NULL;
- commit_ptr = NULL;
+// iew_ptr = NULL;
+// commit_ptr = NULL;
}
template<class Impl>
@@ -255,6 +260,55 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
template <class Impl>
void
+DefaultRename<Impl>::switchOut()
+{
+ for (int i = 0; i < numThreads; i++) {
+ typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
+
+ while (!historyBuffer[i].empty()) {
+ assert(hb_it != historyBuffer[i].end());
+
+ DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
+ "number %i.\n", i, (*hb_it).instSeqNum);
+
+ // Tell the rename map to set the architected register to the
+ // previous physical register that it was renamed to.
+ renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
+
+ // Put the renamed physical register back on the free list.
+ freeList->addReg(hb_it->newPhysReg);
+
+ historyBuffer[i].erase(hb_it++);
+ }
+ insts[i].clear();
+ skidBuffer[i].clear();
+ }
+}
+
+template <class Impl>
+void
+DefaultRename<Impl>::takeOverFrom()
+{
+ _status = Inactive;
+ initStage();
+
+ for (int i=0; i< numThreads; i++) {
+ renameStatus[i] = Idle;
+
+ stalls[i].iew = false;
+ stalls[i].commit = false;
+ serializeInst[i] = NULL;
+
+ instsInProgress[i] = 0;
+
+ emptyROB[i] = true;
+
+ serializeOnNextInst[i] = false;
+ }
+}
+
+template <class Impl>
+void
DefaultRename<Impl>::squash(unsigned tid)
{
DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid);
@@ -393,7 +447,7 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
} else if (renameStatus[tid] == Unblocking) {
renameInsts(tid);
- ++renameUnblockCycles;
+// ++renameUnblockCycles;
if (validInsts()) {
// Add the current inputs to the skid buffer so they can be
@@ -564,6 +618,8 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
} else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
DPRINTF(Rename, "Serialize after instruction encountered.\n");
+ renamedSerializing++;
+
inst->setSerializeHandled();
serializeAfter(insts_to_rename, tid);
@@ -594,13 +650,12 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
// Increment which instruction we're on.
++toIEWIndex;
- ++renameRenamedInsts;
-
// Decrement how many instructions are available.
--insts_available;
}
instsInProgress[tid] += renamed_insts;
+ renameRenamedInsts += renamed_insts;
// If we wrote to the time buffer, record this.
if (toIEWIndex) {
@@ -635,6 +690,8 @@ DefaultRename<Impl>::skidInsert(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
"skidBuffer\n", tid, inst->seqNum, inst->readPC());
+ ++renameSkidInsts;
+
skidBuffer[tid].push_back(inst);
}
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh
index 48199915f..0748850ea 100644
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -97,6 +97,10 @@ class ROB
*/
void setActiveThreads(std::list<unsigned>* at_ptr);
+ void switchOut();
+
+ void takeOverFrom();
+
/** Function to insert an instruction into the ROB. Note that whatever
* calls this function must ensure that there is enough space within the
* ROB for the new instruction.
diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh
index 96d907cda..02a4bfbee 100644
--- a/cpu/o3/rob_impl.hh
+++ b/cpu/o3/rob_impl.hh
@@ -121,6 +121,31 @@ ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr)
activeThreads = at_ptr;
}
+template <class Impl>
+void
+ROB<Impl>::switchOut()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ instList[tid].clear();
+ }
+}
+
+template <class Impl>
+void
+ROB<Impl>::takeOverFrom()
+{
+ for (int tid=0; tid < numThreads; tid++) {
+ doneSquashing[tid] = true;
+ threadEntries[tid] = 0;
+ squashIt[tid] = instList[tid].end();
+ }
+ numInstsInROB = 0;
+
+ // Initialize the "universal" ROB head & tail point to invalid
+ // pointers
+ head = instList[0].end();
+ tail = instList[0].end();
+}
template <class Impl>
void
diff --git a/cpu/o3/sat_counter.cc b/cpu/o3/sat_counter.cc
index a6e131483..b481b4ad2 100644
--- a/cpu/o3/sat_counter.cc
+++ b/cpu/o3/sat_counter.cc
@@ -30,17 +30,17 @@
#include "cpu/o3/sat_counter.hh"
SatCounter::SatCounter()
- : maxVal(0), counter(0)
+ : initialVal(0), counter(0)
{
}
SatCounter::SatCounter(unsigned bits)
- : maxVal((1 << bits) - 1), counter(0)
+ : initialVal(0), maxVal((1 << bits) - 1), counter(0)
{
}
-SatCounter::SatCounter(unsigned bits, unsigned initial_val)
- : maxVal((1 << bits) - 1), counter(initial_val)
+SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
+ : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
{
// Check to make sure initial value doesn't exceed the max counter value.
if (initial_val > maxVal) {
@@ -53,19 +53,3 @@ SatCounter::setBits(unsigned bits)
{
maxVal = (1 << bits) - 1;
}
-
-void
-SatCounter::increment()
-{
- if (counter < maxVal) {
- ++counter;
- }
-}
-
-void
-SatCounter::decrement()
-{
- if (counter > 0) {
- --counter;
- }
-}
diff --git a/cpu/o3/sat_counter.hh b/cpu/o3/sat_counter.hh
index 952f1f86d..1d20a8a8f 100644
--- a/cpu/o3/sat_counter.hh
+++ b/cpu/o3/sat_counter.hh
@@ -57,22 +57,34 @@ class SatCounter
* @param bits How many bits the counter will have.
* @param initial_val Starting value for each counter.
*/
- SatCounter(unsigned bits, unsigned initial_val);
+ SatCounter(unsigned bits, uint8_t initial_val);
/**
* Sets the number of bits.
*/
void setBits(unsigned bits);
+ void reset() { counter = initialVal; }
+
/**
* Increments the counter's current value.
*/
- void increment();
+ void increment()
+ {
+ if (counter < maxVal) {
+ ++counter;
+ }
+ }
/**
* Decrements the counter's current value.
*/
- void decrement();
+ void decrement()
+ {
+ if (counter > 0) {
+ --counter;
+ }
+ }
/**
* Read the counter's value.
@@ -81,6 +93,7 @@ class SatCounter
{ return counter; }
private:
+ uint8_t initialVal;
uint8_t maxVal;
uint8_t counter;
};
diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh
index 846f44176..17719bdeb 100644
--- a/cpu/o3/thread_state.hh
+++ b/cpu/o3/thread_state.hh
@@ -60,7 +60,7 @@ struct O3ThreadState : public ThreadState {
{ }
#else
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
- : ThreadState(-1, _thread_num, NULL, _process, _asid),
+ : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
cpu(_cpu), inSyscall(0), trapPending(0)
{ }