summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc6
-rw-r--r--src/cpu/o3/checker_builder.cc8
-rw-r--r--src/cpu/o3/commit_impl.hh19
-rw-r--r--src/cpu/o3/cpu.cc7
-rw-r--r--src/cpu/o3/fetch_impl.hh9
-rw-r--r--src/cpu/o3/iew.hh8
-rw-r--r--src/cpu/o3/iew_impl.hh29
-rw-r--r--src/cpu/o3/inst_queue.hh4
-rw-r--r--src/cpu/o3/inst_queue_impl.hh28
-rw-r--r--src/cpu/o3/lsq_unit.hh4
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh15
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh3
-rw-r--r--src/cpu/o3/rename.hh2
-rw-r--r--src/cpu/o3/rename_impl.hh13
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh10
-rw-r--r--src/cpu/o3/thread_state.hh29
-rw-r--r--src/cpu/o3/tournament_pred.cc10
-rw-r--r--src/cpu/o3/tournament_pred.hh3
18 files changed, 172 insertions, 35 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5e767655d..ff123a6f7 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
+Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
#endif // FULL_SYSTEM
@@ -68,6 +69,7 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+Param<Tick> progress_interval;
Param<unsigned> cachePorts;
@@ -162,6 +164,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
+ INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
#endif // FULL_SYSTEM
@@ -184,6 +187,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
"Terminate when all threads have reached this load"
"count",
0),
+ INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
@@ -305,6 +309,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
+ params->profile = profile;
#else
params->workload = workload;
#endif // FULL_SYSTEM
@@ -317,6 +322,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
+ params->progress_interval = progress_interval;
//
// Caches
diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc
index 782d963b0..02c817499 100644
--- a/src/cpu/o3/checker_builder.cc
+++ b/src/cpu/o3/checker_builder.cc
@@ -64,6 +64,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+ Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -78,6 +79,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<bool> defer_registration;
Param<bool> exitOnError;
+ Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -94,6 +96,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
+ INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -109,6 +112,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
+ INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -127,6 +131,7 @@ CREATE_SIM_OBJECT(O3Checker)
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->exitOnError = exitOnError;
+ params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -139,6 +144,9 @@ CREATE_SIM_OBJECT(O3Checker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
+ Tick temp2 = progress_interval;
+ params->progress_interval = 0;
+ temp2++;
#if FULL_SYSTEM
params->itb = itb;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 34f487e2c..c80e4d8c1 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
-
+// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
}
updateComInstStats(head_inst);
+#if FULL_SYSTEM
+ if (thread[tid]->profile) {
+// bool usermode =
+// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
+// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
+ thread[tid]->profilePC = head_inst->readPC();
+ ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getTC(),
+ head_inst->staticInst);
+
+ if (node)
+ thread[tid]->profileNode = node;
+ }
+#endif
+
if (head_inst->traceData) {
head_inst->traceData->setFetchSeq(head_inst->seqNum);
head_inst->traceData->setCPSeq(thread[tid]->numInst);
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
+ if (head_inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
// Finally clear the head ROB entry.
rob->retireHead(tid);
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 19ab7f4c5..7386dfadd 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -33,6 +33,7 @@
#include "config/use_checker.hh"
#if FULL_SYSTEM
+#include "cpu/quiesce_event.hh"
#include "sim/system.hh"
#else
#include "sim/process.hh"
@@ -793,6 +794,7 @@ template <class Impl>
unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
+ DPRINTF(O3CPU, "Switching out\n");
drainCount = 0;
fetch.drain();
decode.drain();
@@ -849,6 +851,8 @@ FullO3CPU<Impl>::signalDrained()
changeState(SimObject::Drained);
+ BaseCPU::switchOut();
+
if (drainEvent) {
drainEvent->process();
drainEvent = NULL;
@@ -863,6 +867,7 @@ FullO3CPU<Impl>::switchOut()
{
fetch.switchOut();
rename.switchOut();
+ iew.switchOut();
commit.switchOut();
instList.clear();
while (!removeList.empty()) {
@@ -874,6 +879,8 @@ FullO3CPU<Impl>::switchOut()
if (checker)
checker->switchOut();
#endif
+ if (tickEvent.scheduled())
+ tickEvent.squash();
}
template <class Impl>
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 80e429ae9..3c47c39fa 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
+ interruptPending = false;
branchPred.takeOverFrom();
}
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (cacheBlocked || (interruptPending && flags == 0)) {
+ if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
- warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
- curTick);
+// warn("%lli: Quiesce instruction encountered, halting fetch!",
+// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
- warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
+// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#endif // FULL_SYSTEM
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 76fa008ee..a400c9fa8 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -216,6 +216,7 @@ class DefaultIEW
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
@@ -226,6 +227,7 @@ class DefaultIEW
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
@@ -450,7 +452,9 @@ class DefaultIEW
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
- unsigned wbOutstanding;
+
+ /** Number of instructions in flight that will writeback. */
+ int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
@@ -507,6 +511,8 @@ class DefaultIEW
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
+ /** Stat for total number of executed store instructions. */
+// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index e9b24a6d4..b2baae296 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
iewExecutedInsts
- .name(name() + ".EXEC:insts")
+ .name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
- .name(name() + ".EXEC:loads")
+ .name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
- .name(name() + ".EXEC:squashedInsts")
+ .name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
iewExecutedSwp
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
{
// Clear any state.
switchedOut = true;
+ assert(insts[0].empty());
+ assert(skidBuffer[0].empty());
instQueue.switchOut();
ldstQueue.switchOut();
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
- // @todo: Fix hardcoded number
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
wbNumInst = 0;
}
- assert((wbCycle * wbWidth + wbNumInst) < wbMax);
+ assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
+ DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
+ wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -1273,13 +1276,23 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
} else if (inst->isStore()) {
- ldstQueue.executeStore(inst);
+ fault = ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
- if (inst->req && !(inst->req->getFlags() & LOCKED)) {
+ if (!inst->isStoreConditional() && fault == NoFault) {
inst->setExecuted();
instToCommit(inst);
+ } else if (fault != NoFault) {
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
+ inst->setExecuted();
+
+ instToCommit(inst);
+ activityThisCycle();
}
// Store conditionals will mark themselves as
@@ -1404,7 +1417,7 @@ DefaultIEW<Impl>::writebackInsts()
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
- if (!inst->isSquashed() && inst->isExecuted()) {
+ if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index d745faf7b..3dd4dc658 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -479,13 +479,13 @@ class InstructionQueue
/** Distribution of number of instructions in the queue.
* @todo: Need to create struct to track the entry time for each
* instruction. */
- Stats::VectorDistribution<> queueResDist;
+// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction.
* @todo: Need to create struct to track the ready time for each
* instruction. */
- Stats::VectorDistribution<> issueDelayDist;
+// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 47634f645..6edb528a9 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
-
+/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
+*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
-
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
-
+*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
@@ -385,8 +386,16 @@ template <class Impl>
void
InstructionQueue<Impl>::switchOut()
{
+/*
+ if (!instList[0].empty() || (numEntries != freeEntries) ||
+ !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
+ dumpInsts();
+// assert(0);
+ }
+*/
resetState();
dependGraph.reset();
+ instsToExecute.clear();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
@@ -642,9 +651,12 @@ template <class Impl>
void
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{
+ DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
// The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill.
if (isSwitchedOut()) {
+ DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
+ inst->seqNum);
return;
}
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
(squashed_inst->isMemRef() &&
!squashed_inst->memOpDone)) {
+ DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
+ "squashed.\n",
+ tid, squashed_inst->seqNum, squashed_inst->readPC());
+
// Remove the instruction from the dependency list.
if (!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
- } else {
+ } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
count[squashed_inst->threadNumber]--;
++freeEntries;
-
- DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
- "squashed.\n",
- tid, squashed_inst->seqNum, squashed_inst->readPC());
}
instList[tid].erase(squash_it--);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 1358a3699..90d1a3d53 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -407,7 +407,6 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
-
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar<> lsqForwLoads;
@@ -421,6 +420,9 @@ class LSQUnit {
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index fa716c712..98bea74fb 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -220,8 +224,10 @@ void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
- for (int i = 0; i < loadQueue.size(); ++i)
+ for (int i = 0; i < loadQueue.size(); ++i) {
+ assert(!loadQueue[i]);
loadQueue[i] = NULL;
+ }
assert(storesToWB == 0);
}
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
if (load_fault != NoFault) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
+ // Mark it as executed unless it is an uncached load that
+ // needs to hit the head of commit.
+ if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) {
+ inst->setExecuted();
+ }
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
+ ++lsqMemOrderViolation;
return genMachineCheckFault();
}
@@ -820,6 +832,7 @@ LSQUnit<Impl>::completeStore(int store_idx)
// A bit conservative because a store completion may not free up entries,
// but hopefully avoids two store completions in one cycle from making
// the CPU tick twice.
+ cpu->wakeCPU();
cpu->activityThisCycle();
if (store_idx == storeHead) {
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 16f67a4e0..c649ca385 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
+ assert(instList[0].empty());
+ assert(instsToReplay.empty());
+ assert(memDepHash.empty());
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index ba26a01dd..177b9cb87 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -417,6 +417,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
+ PhysRegIndex maxPhysicalRegs;
+
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 782c0fe5f..248d7deb6 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[tid].erase(hb_it++);
++renameUndoneMaps;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index a4546e669..25e1db21c 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -54,7 +54,7 @@ template <class Impl>
void
O3ThreadContext<Impl>::dumpFuncProfile()
{
- // Currently not supported
+ thread->dumpFuncProfile();
}
#endif
@@ -239,12 +239,16 @@ O3ThreadContext<Impl>::readLastSuspend()
template <class Impl>
void
O3ThreadContext<Impl>::profileClear()
-{}
+{
+ thread->profileClear();
+}
template <class Impl>
void
O3ThreadContext<Impl>::profileSample()
-{}
+{
+ thread->profileSample();
+}
#endif
template <class Impl>
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index b6f2e14c0..5fe7bb94d 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -31,8 +31,11 @@
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
+#include "base/callback.hh"
+#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
+#include "sim/sim_exit.hh"
class Event;
class Process;
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
#if FULL_SYSTEM
O3ThreadState(O3CPU *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
- inSyscall(0), trapPending(0)
- { }
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ {
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<O3ThreadState,
+ &O3ThreadState::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
+ }
#else
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
MemObject *mem)
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
/** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
#endif
+
+#if FULL_SYSTEM
+ void dumpFuncProfile()
+ {
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(tc, *os);
+ }
+#endif
};
#endif // __CPU_O3_THREAD_STATE_HH__
diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc
index 7cf78dcb1..ffb941c77 100644
--- a/src/cpu/o3/tournament_pred.cc
+++ b/src/cpu/o3/tournament_pred.cc
@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
+ localPredictorMask = floorPow2(localPredictorSize) - 1;
+
if (!isPowerOf2(localHistoryTableSize)) {
fatal("Invalid local history table size!\n");
}
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
- & localHistoryMask;
+ & localPredictorMask;
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
bp_history = (void *)history;
assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
+ local_history_idx < localHistoryTableSize &&
+ local_predictor_idx < localPredictorSize);
// Commented code is for doing speculative update of counters and
// all histories.
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
// Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_hist = localHistoryTable[local_history_idx];
- local_predictor_idx = local_predictor_hist & localHistoryMask;
+ local_predictor_idx = local_predictor_hist & localPredictorMask;
// Update the choice predictor to tell it which one was correct if
// there was a prediction.
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
}
assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Update the counters and local history with the proper
diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh
index 66b4aaae2..472944910 100644
--- a/src/cpu/o3/tournament_pred.hh
+++ b/src/cpu/o3/tournament_pred.hh
@@ -159,6 +159,9 @@ class TournamentBP
/** Size of the local predictor. */
unsigned localPredictorSize;
+ /** Mask to get the proper index bits into the predictor. */
+ unsigned localPredictorMask;
+
/** Number of bits of the local predictor's counters. */
unsigned localCtrBits;