summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-09-30 23:43:23 -0400
committerKevin Lim <ktlim@umich.edu>2006-09-30 23:43:23 -0400
commit4ed184eadefb16627f2807cb3dc7886bb1b920d1 (patch)
tree7704b61afbb10876c86329665fe7d35865ba611b /src/cpu/o3
parent51425382ca079290411e033acc8cf14dde36c82b (diff)
parent30b719fd768115bb26e848a02096350c11c1b0bd (diff)
downloadgem5-4ed184eadefb16627f2807cb3dc7886bb1b920d1.tar.xz
Merge ktlim@zamp:./local/clean/o3-merge/m5
into zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem configs/boot/micro_memlat.rcS: configs/boot/micro_tlblat.rcS: src/arch/alpha/ev5.cc: src/arch/alpha/isa/decoder.isa: src/arch/alpha/isa_traits.hh: src/cpu/base.cc: src/cpu/base.hh: src/cpu/base_dyn_inst.hh: src/cpu/checker/cpu.hh: src/cpu/checker/cpu_impl.hh: src/cpu/o3/alpha/cpu_impl.hh: src/cpu/o3/alpha/params.hh: src/cpu/o3/checker_builder.cc: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_impl.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/rename_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/checker_builder.cc: src/cpu/ozone/cpu.hh: src/cpu/ozone/cpu_impl.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/front_end_impl.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_back_end_impl.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/lw_lsq_impl.hh: src/cpu/ozone/thread_state.hh: src/cpu/simple/base.cc: src/cpu/simple_thread.cc: src/cpu/simple_thread.hh: src/cpu/thread_state.hh: src/dev/ide_disk.cc: src/python/m5/objects/O3CPU.py: src/python/m5/objects/Root.py: src/python/m5/objects/System.py: src/sim/pseudo_inst.cc: src/sim/pseudo_inst.hh: src/sim/system.hh: util/m5/m5.c: Hand merge. --HG-- rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc rename : arch/alpha/system.cc => src/arch/alpha/system.cc rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc rename : cpu/base.cc => src/cpu/base.cc rename : cpu/base.hh => src/cpu/base.hh rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc rename : cpu/checker/o3_cpu_builder.cc => src/cpu/o3/checker_builder.cc rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh rename : cpu/checker/cpu_builder.cc => src/cpu/ozone/checker_builder.cc rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/simple/cpu.cc => src/cpu/simple/base.cc rename : cpu/cpu_exec_context.cc => src/cpu/simple_thread.cc rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : dev/ide_disk.hh => src/dev/ide_disk.hh rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/O3CPU.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py rename : python/m5/objects/System.py => src/python/m5/objects/System.py rename : sim/eventq.hh => src/sim/eventq.hh rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh rename : sim/serialize.cc => src/sim/serialize.cc rename : sim/stat_control.cc => src/sim/stat_control.cc rename : sim/stat_control.hh => src/sim/stat_control.hh rename : sim/system.hh => src/sim/system.hh extra : convert_revision : 135d90e43f6cea89f9460ba4e23f4b0b85886e7d
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc11
-rw-r--r--src/cpu/o3/checker_builder.cc13
-rw-r--r--src/cpu/o3/commit_impl.hh19
-rw-r--r--src/cpu/o3/cpu.cc43
-rw-r--r--src/cpu/o3/fetch_impl.hh9
-rw-r--r--src/cpu/o3/iew.hh8
-rw-r--r--src/cpu/o3/iew_impl.hh45
-rw-r--r--src/cpu/o3/inst_queue.hh4
-rw-r--r--src/cpu/o3/inst_queue_impl.hh28
-rw-r--r--src/cpu/o3/lsq_impl.hh10
-rw-r--r--src/cpu/o3/lsq_unit.hh11
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh14
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh3
-rw-r--r--src/cpu/o3/rename.hh2
-rw-r--r--src/cpu/o3/rename_impl.hh13
-rw-r--r--src/cpu/o3/thread_state.hh29
-rw-r--r--src/cpu/o3/tournament_pred.cc10
-rw-r--r--src/cpu/o3/tournament_pred.hh3
18 files changed, 233 insertions, 42 deletions
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5e767655d..fbf1f342c 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
+Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
#endif // FULL_SYSTEM
@@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+Param<Counter> stats_reset_inst;
+Param<Tick> progress_interval;
Param<unsigned> cachePorts;
@@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
+ INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
#endif // FULL_SYSTEM
@@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
"Terminate when all threads have reached this load"
"count",
0),
+ INIT_PARAM_DFLT(stats_reset_inst,
+ "blah",
+ 0),
+ INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
@@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
+ params->profile = profile;
#else
params->workload = workload;
#endif // FULL_SYSTEM
@@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
+ params->stats_reset_inst = stats_reset_inst;
+ params->progress_interval = progress_interval;
//
// Caches
diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc
index 782d963b0..ad83ec57a 100644
--- a/src/cpu/o3/checker_builder.cc
+++ b/src/cpu/o3/checker_builder.cc
@@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
+ Param<Counter> stats_reset_inst;
+ Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<bool> defer_registration;
Param<bool> exitOnError;
+ Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
+ INIT_PARAM(stats_reset_inst,
+ "blah"),
+ INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
+ INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker)
params->max_insts_all_threads = 0;
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
+ params->stats_reset_inst = 0;
params->exitOnError = exitOnError;
+ params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
+ temp = stats_reset_inst;
+ Tick temp2 = progress_interval;
+ params->progress_interval = 0;
+ temp2++;
#if FULL_SYSTEM
params->itb = itb;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 34f487e2c..6ae01ae67 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
-
+// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
}
updateComInstStats(head_inst);
+#if FULL_SYSTEM
+ if (thread[tid]->profile) {
+// bool usermode =
+// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
+// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
+ thread[tid]->profilePC = head_inst->readPC();
+ ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
+ head_inst->staticInst);
+
+ if (node)
+ thread[tid]->profileNode = node;
+ }
+#endif
+
if (head_inst->traceData) {
head_inst->traceData->setFetchSeq(head_inst->seqNum);
head_inst->traceData->setCPSeq(thread[tid]->numInst);
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
+ if (head_inst->isCopy())
+ panic("Should not commit any copy instructions!");
+
// Finally clear the head ROB entry.
rob->retireHead(tid);
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 19ab7f4c5..4279df6f7 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -33,6 +33,7 @@
#include "config/use_checker.hh"
#if FULL_SYSTEM
+#include "cpu/quiesce_event.hh"
#include "sim/system.hh"
#else
#include "sim/process.hh"
@@ -793,6 +794,8 @@ template <class Impl>
unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
+ DPRINTF(O3CPU, "Switching out\n");
+ BaseCPU::switchOut(_sampler);
drainCount = 0;
fetch.drain();
decode.drain();
@@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut()
{
fetch.switchOut();
rename.switchOut();
+ iew.switchOut();
commit.switchOut();
instList.clear();
while (!removeList.empty()) {
@@ -931,6 +935,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
}
template <class Impl>
+void
+FullO3CPU<Impl>::serialize(std::ostream &os)
+{
+ BaseCPU::serialize(os);
+ nameOut(os, csprintf("%s.tickEvent", name()));
+ tickEvent.serialize(os);
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // write out the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static CPUExecContext temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ nameOut(os, csprintf("%s.xc.%i", name(), i));
+ temp.copyXC(thread[i]->getXCProxy());
+ temp.serialize(os);
+ }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+ BaseCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+
+ // Use SimpleThread's ability to checkpoint to make it easier to
+ // read in the registers. Also make this static so it doesn't
+ // get instantiated multiple times (causes a panic in statistics).
+ static CPUExecContext temp;
+
+ for (int i = 0; i < thread.size(); i++) {
+ temp.copyXC(thread[i]->getXCProxy());
+ temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
+ }
+}
+
+template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)
{
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 1e080181c..2d447bfe5 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
+ interruptPending = false;
branchPred.takeOverFrom();
}
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
- if (cacheBlocked || (interruptPending && flags == 0)) {
+ if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
- warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
- curTick);
+// warn("%lli: Quiesce instruction encountered, halting fetch!",
+// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
- warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
+// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#endif // FULL_SYSTEM
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 76fa008ee..a400c9fa8 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -216,6 +216,7 @@ class DefaultIEW
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
@@ -226,6 +227,7 @@ class DefaultIEW
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
+ assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
@@ -450,7 +452,9 @@ class DefaultIEW
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
- unsigned wbOutstanding;
+
+ /** Number of instructions in flight that will writeback. */
+ int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
@@ -507,6 +511,8 @@ class DefaultIEW
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
+ /** Stat for total number of executed store instructions. */
+// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index e9b24a6d4..c82f6dd21 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
iewExecutedInsts
- .name(name() + ".EXEC:insts")
+ .name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
- .name(name() + ".EXEC:loads")
+ .name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
- .name(name() + ".EXEC:squashedInsts")
+ .name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
iewExecutedSwp
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
{
// Clear any state.
switchedOut = true;
+ assert(insts[0].empty());
+ assert(skidBuffer[0].empty());
instQueue.switchOut();
ldstQueue.switchOut();
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
- // @todo: Fix hardcoded number
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
wbNumInst = 0;
}
- assert((wbCycle * wbWidth + wbNumInst) < wbMax);
+ assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
+ DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
+ wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -903,6 +906,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
template <class Impl>
void
+DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
+{
+ while (!insts[tid].empty()) {
+ if (insts[tid].front()->isLoad() ||
+ insts[tid].front()->isStore() ) {
+ toRename->iewInfo[tid].dispatchedToLSQ++;
+ }
+
+ toRename->iewInfo[tid].dispatched++;
+
+ insts[tid].pop();
+ }
+}
+
+template <class Impl>
+void
DefaultIEW<Impl>::wakeCPU()
{
cpu->wakeCPU();
@@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
} else if (inst->isStore()) {
- ldstQueue.executeStore(inst);
+ fault = ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
- if (inst->req && !(inst->req->getFlags() & LOCKED)) {
+ if (!inst->isStoreConditional() && fault == NoFault) {
+ inst->setExecuted();
+
+ instToCommit(inst);
+ } else if (fault != NoFault) {
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
inst->setExecuted();
instToCommit(inst);
+ activityThisCycle();
}
// Store conditionals will mark themselves as
@@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts()
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
- if (!inst->isSquashed() && inst->isExecuted()) {
+ if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index d745faf7b..3dd4dc658 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -479,13 +479,13 @@ class InstructionQueue
/** Distribution of number of instructions in the queue.
* @todo: Need to create struct to track the entry time for each
* instruction. */
- Stats::VectorDistribution<> queueResDist;
+// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction.
* @todo: Need to create struct to track the ready time for each
* instruction. */
- Stats::VectorDistribution<> issueDelayDist;
+// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 47634f645..6edb528a9 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
-
+/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
+*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
-
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
-
+*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
@@ -385,8 +386,16 @@ template <class Impl>
void
InstructionQueue<Impl>::switchOut()
{
+/*
+ if (!instList[0].empty() || (numEntries != freeEntries) ||
+ !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
+ dumpInsts();
+// assert(0);
+ }
+*/
resetState();
dependGraph.reset();
+ instsToExecute.clear();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
@@ -642,9 +651,12 @@ template <class Impl>
void
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{
+ DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
// The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill.
if (isSwitchedOut()) {
+ DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
+ inst->seqNum);
return;
}
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
(squashed_inst->isMemRef() &&
!squashed_inst->memOpDone)) {
+ DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
+ "squashed.\n",
+ tid, squashed_inst->seqNum, squashed_inst->readPC());
+
// Remove the instruction from the dependency list.
if (!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
- } else {
+ } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
count[squashed_inst->threadNumber]--;
++freeEntries;
-
- DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
- "squashed.\n",
- tid, squashed_inst->seqNum, squashed_inst->readPC());
}
instList[tid].erase(squash_it--);
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 2bbab71f0..a1ac5adb8 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -167,6 +167,16 @@ LSQ<Impl>::regStats()
template<class Impl>
void
+LSQ<Impl>::regStats()
+{
+ //Initialize LSQs
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].regStats();
+ }
+}
+
+template<class Impl>
+void
LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
activeThreads = at_ptr;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 1358a3699..8537e9dd7 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -407,20 +407,9 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
-
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar<> lsqForwLoads;
- /** Total number of loads ignored due to invalid addresses. */
- Stats::Scalar<> invAddrLoads;
-
- /** Total number of squashed loads. */
- Stats::Scalar<> lsqSquashedLoads;
-
- /** Total number of responses from the memory system that are
- * ignored due to the instruction already being squashed. */
- Stats::Scalar<> lsqIgnoredResponses;
-
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index fa716c712..2922b81bd 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -220,8 +224,10 @@ void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
- for (int i = 0; i < loadQueue.size(); ++i)
+ for (int i = 0; i < loadQueue.size(); ++i) {
+ assert(!loadQueue[i]);
loadQueue[i] = NULL;
+ }
assert(storesToWB == 0);
}
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
if (load_fault != NoFault) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
+ // Mark it as executed unless it is an uncached load that
+ // needs to hit the head of commit.
+ if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
+ inst->setExecuted();
+ }
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
+ ++lsqMemOrderViolation;
return genMachineCheckFault();
}
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 16f67a4e0..c649ca385 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
+ assert(instList[0].empty());
+ assert(instsToReplay.empty());
+ assert(memDepHash.empty());
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index ba26a01dd..177b9cb87 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -417,6 +417,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
+ PhysRegIndex maxPhysicalRegs;
+
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 782c0fe5f..248d7deb6 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
- numThreads(params->numberOfThreads)
+ numThreads(params->numberOfThreads),
+ maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
+ // Be sure to mark its register as ready if it's a misc register.
+ if (hb_it->newPhysReg >= maxPhysicalRegs) {
+ scoreboard->setReg(hb_it->newPhysReg);
+ }
+
historyBuffer[tid].erase(hb_it++);
++renameUndoneMaps;
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index b6f2e14c0..0247deb52 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -31,8 +31,11 @@
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
+#include "base/callback.hh"
+#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
+#include "sim/sim_exit.hh"
class Event;
class Process;
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
#if FULL_SYSTEM
O3ThreadState(O3CPU *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
- inSyscall(0), trapPending(0)
- { }
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ {
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<O3ThreadState,
+ &O3ThreadState::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
+ }
#else
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
MemObject *mem)
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
/** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
#endif
+
+#if FULL_SYSTEM
+ void dumpFuncProfile()
+ {
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(xcProxy, *os);
+ }
+#endif
};
#endif // __CPU_O3_THREAD_STATE_HH__
diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc
index 7cf78dcb1..ffb941c77 100644
--- a/src/cpu/o3/tournament_pred.cc
+++ b/src/cpu/o3/tournament_pred.cc
@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
+ localPredictorMask = floorPow2(localPredictorSize) - 1;
+
if (!isPowerOf2(localHistoryTableSize)) {
fatal("Invalid local history table size!\n");
}
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
- & localHistoryMask;
+ & localPredictorMask;
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
bp_history = (void *)history;
assert(globalHistory < globalPredictorSize &&
- local_history_idx < localPredictorSize);
+ local_history_idx < localHistoryTableSize &&
+ local_predictor_idx < localPredictorSize);
// Commented code is for doing speculative update of counters and
// all histories.
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
// Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_hist = localHistoryTable[local_history_idx];
- local_predictor_idx = local_predictor_hist & localHistoryMask;
+ local_predictor_idx = local_predictor_hist & localPredictorMask;
// Update the choice predictor to tell it which one was correct if
// there was a prediction.
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
}
assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Update the counters and local history with the proper
diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh
index 66b4aaae2..472944910 100644
--- a/src/cpu/o3/tournament_pred.hh
+++ b/src/cpu/o3/tournament_pred.hh
@@ -159,6 +159,9 @@ class TournamentBP
/** Size of the local predictor. */
unsigned localPredictorSize;
+ /** Mask to get the proper index bits into the predictor. */
+ unsigned localPredictorMask;
+
/** Number of bits of the local predictor's counters. */
unsigned localCtrBits;