summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/arch/alpha/isa/decoder.isa10
-rw-r--r--src/arch/sparc/isa/decoder.isa20
-rw-r--r--src/cpu/base.cc3
-rw-r--r--src/cpu/base_dyn_inst.hh61
-rw-r--r--src/cpu/base_dyn_inst_impl.hh10
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc7
-rw-r--r--src/cpu/o3/alpha/cpu_impl.hh1
-rw-r--r--src/cpu/o3/commit.hh15
-rw-r--r--src/cpu/o3/commit_impl.hh126
-rw-r--r--src/cpu/o3/cpu.cc10
-rw-r--r--src/cpu/o3/cpu.hh16
-rw-r--r--src/cpu/o3/fetch_impl.hh14
-rw-r--r--src/cpu/o3/iew_impl.hh62
-rw-r--r--src/cpu/o3/inst_queue_impl.hh18
-rw-r--r--src/cpu/o3/lsq_unit.hh25
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh100
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh10
-rw-r--r--src/cpu/o3/rename_map.cc2
-rw-r--r--src/mem/bus.cc4
-rw-r--r--src/mem/cache/cache_impl.hh7
-rw-r--r--src/mem/cache/miss/miss_queue.cc1
-rw-r--r--tests/configs/o3-timing.py4
-rw-r--r--tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini64
-rw-r--r--tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out57
-rw-r--r--tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt18
-rw-r--r--tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr7
-rw-r--r--tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout33
-rw-r--r--tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini53
-rw-r--r--tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out54
-rw-r--r--tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt354
-rw-r--r--tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr3
-rw-r--r--tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout11
-rw-r--r--tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini53
-rw-r--r--tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out54
-rw-r--r--tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt142
-rw-r--r--tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr3
-rw-r--r--tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout9
-rw-r--r--tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini53
-rw-r--r--tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out54
-rw-r--r--tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt746
-rw-r--r--tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr6
-rw-r--r--tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout11
42 files changed, 1209 insertions, 1102 deletions
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa
index b62372f66..af1a91a62 100644
--- a/src/arch/alpha/isa/decoder.isa
+++ b/src/arch/alpha/isa/decoder.isa
@@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() {
0: OpcdecFault::hw_st_quad();
1: decode HW_LDST_QUAD {
format HwLoad {
- 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L);
- 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q);
+ 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }},
+ L, IsSerializing, IsSerializeBefore);
+ 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }},
+ Q, IsSerializing, IsSerializeBefore);
}
}
}
@@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() {
1: decode HW_LDST_COND {
0: decode HW_LDST_QUAD {
0: hw_st({{ EA = (Rb + disp) & ~3; }},
- {{ Mem.ul = Ra<31:0>; }}, L);
+ {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore);
1: hw_st({{ EA = (Rb + disp) & ~7; }},
- {{ Mem.uq = Ra.uq; }}, Q);
+ {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore);
}
1: FailUnimpl::hw_st_cond();
diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa
index 556bb4bca..68b2183ad 100644
--- a/src/arch/sparc/isa/decoder.isa
+++ b/src/arch/sparc/isa/decoder.isa
@@ -1324,8 +1324,14 @@ decode OP default Unknown::unknown()
0x05: stb({{Mem.ub = Rd.sb;}});
0x06: sth({{Mem.uhw = Rd.shw;}});
0x07: sttw({{
- (Mem.tuw).a = RdLow<31:0>;
- (Mem.tuw).b = RdHigh<31:0>;
+ //This temporary needs to be here so that the parser
+ //will correctly identify this instruction as a store.
+ //It's probably either the parenthesis or referencing
+ //the member variable that throws confuses it.
+ Twin32_t temp;
+ temp.a = RdLow<31:0>;
+ temp.b = RdHigh<31:0>;
+ Mem.tuw = temp;
}});
}
format Load {
@@ -1417,8 +1423,14 @@ decode OP default Unknown::unknown()
0x15: stba({{Mem.ub = Rd;}}, {{EXT_ASI}});
0x16: stha({{Mem.uhw = Rd;}}, {{EXT_ASI}});
0x17: sttwa({{
- (Mem.tuw).a = RdLow<31:0>;
- (Mem.tuw).b = RdHigh<31:0>;
+ //This temporary needs to be here so that the parser
+ //will correctly identify this instruction as a store.
+ //It's probably either the parenthesis or referencing
+ //the member variable that throws confuses it.
+ Twin32_t temp;
+ temp.a = RdLow<31:0>;
+ temp.b = RdHigh<31:0>;
+ Mem.tuw = temp;
}}, {{EXT_ASI}});
}
format LoadAlt {
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 3e0be6ad8..4dccee0d3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -226,7 +226,8 @@ BaseCPU::startup()
#endif
if (params->progress_interval) {
- new CPUProgressEvent(&mainEventQueue, params->progress_interval,
+ new CPUProgressEvent(&mainEventQueue,
+ cycles(params->progress_interval),
this);
}
}
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9ccdcdccc..6c6d90076 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** The kind of fault this instruction has generated. */
Fault fault;
- /** The memory request. */
- Request *req;
-
/** Pointer to the data for the memory access. */
uint8_t *memData;
/** The effective virtual address (lds & stores only). */
Addr effAddr;
+ /** Is the effective virtual address valid. */
+ bool effAddrValid;
+
/** The effective physical address. */
Addr physEffAddr;
@@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Returns whether or not this instruction is ready to issue. */
bool readyToIssue() const { return status[CanIssue]; }
+ /** Clears this instruction being able to issue. */
+ void clearCanIssue() { status.reset(CanIssue); }
+
/** Sets this instruction as issued from the IQ. */
void setIssued() { status.set(Issued); }
/** Returns whether or not this instruction has issued. */
bool isIssued() const { return status[Issued]; }
+ /** Clears this instruction as being issued. */
+ void clearIssued() { status.reset(Issued); }
+
/** Sets this instruction as executed. */
void setExecuted() { status.set(Executed); }
@@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
*/
bool eaCalcDone;
+ /** Is this instruction's memory access uncacheable. */
+ bool isUncacheable;
+
+ /** Has this instruction generated a memory request. */
+ bool reqMade;
+
public:
/** Sets the effective address. */
void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
@@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Whether or not the memory operation is done. */
bool memOpDone;
+ /** Is this instruction's memory access uncacheable. */
+ bool uncacheable() { return isUncacheable; }
+
+ /** Has this instruction generated a memory request. */
+ bool hasRequest() { return reqMade; }
+
public:
/** Load queue index. */
int16_t lqIdx;
@@ -776,25 +794,25 @@ template<class T>
inline Fault
BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
{
- // Sometimes reads will get retried, so they may come through here
- // twice.
- if (!req) {
- req = new Request();
- req->setVirt(asid, addr, sizeof(T), flags, this->PC);
- req->setThreadContext(thread->readCpuId(), threadNumber);
- } else {
- assert(addr == req->getVaddr());
- }
+ reqMade = true;
+ Request *req = new Request();
+ req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+ req->setThreadContext(thread->readCpuId(), threadNumber);
if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
TheISA::VMPageSize) {
+ delete req;
return TheISA::genAlignmentFault();
}
fault = cpu->translateDataReadReq(req, thread);
+ if (req->isUncacheable())
+ isUncacheable = true;
+
if (fault == NoFault) {
effAddr = req->getVaddr();
+ effAddrValid = true;
physEffAddr = req->getPaddr();
memReqFlags = req->getFlags();
@@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
this->setExecuted();
+ delete req;
}
if (traceData) {
@@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
traceData->setData(data);
}
- assert(req == NULL);
-
- req = new Request();
+ reqMade = true;
+ Request *req = new Request();
req->setVirt(asid, addr, sizeof(T), flags, this->PC);
req->setThreadContext(thread->readCpuId(), threadNumber);
if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
TheISA::VMPageSize) {
+ delete req;
return TheISA::genAlignmentFault();
}
fault = cpu->translateDataWriteReq(req, thread);
+ if (req->isUncacheable())
+ isUncacheable = true;
+
if (fault == NoFault) {
effAddr = req->getVaddr();
+ effAddrValid = true;
physEffAddr = req->getPaddr();
memReqFlags = req->getFlags();
#if 0
@@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
#else
fault = cpu->write(req, data, sqIdx);
#endif
- }
-
- if (res) {
- // always return some result to keep misspeculated paths
- // (which will ignore faults) deterministic
- *res = (fault == NoFault) ? req->getExtraData() : 0;
+ } else {
+ delete req;
}
return fault;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index c3d71e428..a1c866336 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -92,11 +92,13 @@ template <class Impl>
void
BaseDynInst<Impl>::initVars()
{
- req = NULL;
memData = NULL;
effAddr = 0;
+ effAddrValid = false;
physEffAddr = 0;
+ isUncacheable = false;
+ reqMade = false;
readyRegs = 0;
instResult.integer = 0;
@@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars()
template <class Impl>
BaseDynInst<Impl>::~BaseDynInst()
{
- if (req) {
- delete req;
- }
-
if (memData) {
delete [] memData;
}
@@ -271,7 +269,7 @@ void
BaseDynInst<Impl>::markSrcRegReady()
{
if (++readyRegs == numSrcRegs()) {
- status.set(CanIssue);
+ setCanIssue();
}
}
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
Param<int> clock;
Param<int> phase;
Param<int> numThreads;
+Param<int> cpu_id;
Param<int> activity;
#if FULL_SYSTEM
SimObjectParam<System *> system;
-Param<int> cpu_id;
SimObjectParam<AlphaISA::ITB *> itb;
SimObjectParam<AlphaISA::DTB *> dtb;
Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(clock, "clock speed"),
INIT_PARAM_DFLT(phase, "clock phase", 0),
INIT_PARAM(numThreads, "number of HW thread contexts"),
+ INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM_DFLT(activity, "Initial activity count", 0),
#if FULL_SYSTEM
INIT_PARAM(system, "System object"),
- INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
AlphaSimpleParams *params = new AlphaSimpleParams;
params->clock = clock;
+ params->phase = phase;
params->name = getInstanceName();
params->numberOfThreads = actual_num_threads;
+ params->cpu_id = cpu_id;
params->activity = activity;
#if FULL_SYSTEM
params->system = system;
- params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index b91972704..304ee6c38 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
#endif
// Give the thread the TC.
this->thread[i]->tc = tc;
+ this->thread[i]->setCpuId(params->cpu_id);
// Add the TC to the CPU's list of TC's.
this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
/** Handles squashing due to an TC write. */
void squashFromTC(unsigned tid);
+#if FULL_SYSTEM
+ /** Handles processing an interrupt. */
+ void handleInterrupt();
+#endif // FULL_SYSTEM
+
/** Commits as many instructions as possible. */
void commitInsts();
@@ -409,6 +414,16 @@ class DefaultCommit
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[Impl::MaxThreads];
+ /** Records if there is a trap currently in flight. */
+ bool trapInFlight[Impl::MaxThreads];
+
+ /** Records if there were any stores committed this cycle. */
+ bool committedStores[Impl::MaxThreads];
+
+ /** Records if commit should check if the ROB is truly empty (see
+ commit_impl.hh). */
+ bool checkEmptyROB[Impl::MaxThreads];
+
/** Pointer to the list of active threads. */
std::list<unsigned> *activeThreads;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 18fb2aaa3..3fd85595f 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
for (int i=0; i < numThreads; i++) {
commitStatus[i] = Idle;
changedROBNumEntries[i] = false;
+ checkEmptyROB[i] = false;
+ trapInFlight[i] = false;
+ committedStores[i] = false;
trapSquash[i] = false;
tcSquash[i] = false;
PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage()
for (int i=0; i < numThreads; i++) {
toIEW->commitInfo[i].usedROB = true;
toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+ toIEW->commitInfo[i].emptyROB = true;
}
cpu->activityThisCycle();
@@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
TrapEvent *trap = new TrapEvent(this, tid);
trap->schedule(curTick + trapLatency);
-
- thread[tid]->trapPending = true;
+ trapInFlight[tid] = true;
}
template <class Impl>
void
DefaultCommit<Impl>::generateTCEvent(unsigned tid)
{
+ assert(!trapInFlight[tid]);
DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
tcSquash[tid] = true;
@@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
// Hopefully this doesn't mess things up. Basically I want to squash
// all instructions of this thread.
InstSeqNum squashed_inst = rob->isEmpty() ?
- 0 : rob->readHeadInst(tid)->seqNum - 1;;
+ 0 : rob->readHeadInst(tid)->seqNum - 1;
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
thread[tid]->trapPending = false;
thread[tid]->inSyscall = false;
+ trapInFlight[tid] = false;
trapSquash[tid] = false;
@@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick()
while (threads != end) {
unsigned tid = *threads++;
+ // Clear the bit saying if the thread has committed stores
+ // this cycle.
+ committedStores[tid] = false;
+
if (commitStatus[tid] == ROBSquashing) {
if (rob->isDoneSquashing(tid)) {
@@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick()
updateStatus();
}
+#if FULL_SYSTEM
template <class Impl>
void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
{
-
- //////////////////////////////////////
- // Check for interrupts
- //////////////////////////////////////
-
-#if FULL_SYSTEM
if (interrupt != NoFault) {
// Wait until the ROB is empty and all stores have drained in
// order to enter the interrupt.
@@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit()
// an interrupt needed to be handled.
DPRINTF(Commit, "Interrupt detected.\n");
+ Fault new_interrupt = cpu->getInterrupts();
+ assert(new_interrupt != NoFault);
+
+ // Clear the interrupt now that it's going to be handled
+ toIEW->commitInfo[0].clearInterrupt = true;
+
assert(!thread[0]->inSyscall);
thread[0]->inSyscall = true;
@@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit()
// Generate trap squash event.
generateTrapEvent(0);
- // Clear the interrupt now that it's been handled
- toIEW->commitInfo[0].clearInterrupt = true;
interrupt = NoFault;
} else {
DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
}
- } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
- commitStatus[0] != TrapPending &&
- !trapSquash[0] &&
- !tcSquash[0]) {
+ } else if (commitStatus[0] != TrapPending &&
+ cpu->check_interrupts(cpu->tcBase(0)) &&
+ !trapSquash[0] &&
+ !tcSquash[0]) {
// Process interrupts if interrupts are enabled, not in PAL
// mode, and no other traps or external squashes are currently
// pending.
@@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit()
toIEW->commitInfo[0].interruptPending = true;
}
}
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
+#if FULL_SYSTEM
+ // Check for any interrupt, and start processing it. Or if we
+ // have an outstanding interrupt and are at a point when it is
+ // valid to take an interrupt, process it.
+ if (cpu->check_interrupts(cpu->tcBase(0))) {
+ handleInterrupt();
+ }
#endif // FULL_SYSTEM
////////////////////////////////////
@@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit()
assert(!tcSquash[tid]);
squashFromTrap(tid);
} else if (tcSquash[tid] == true) {
+ assert(commitStatus[tid] != TrapPending);
squashFromTC(tid);
}
@@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit()
bdelay_done_seq_num--;
#endif
}
+
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB.
youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit()
toIEW->commitInfo[tid].usedROB = true;
toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
- if (rob->isEmpty(tid)) {
- toIEW->commitInfo[tid].emptyROB = true;
- }
-
wroteToTimeBuffer = true;
changedROBNumEntries[tid] = false;
+ if (rob->isEmpty(tid))
+ checkEmptyROB[tid] = true;
}
+
+ // ROB is only considered "empty" for previous stages if: a)
+ // ROB is empty, b) there are no outstanding stores, c) IEW
+ // stage has received any information regarding stores that
+ // committed.
+ // c) is checked by making sure to not consider the ROB empty
+ // on the same cycle as when stores have been committed.
+ // @todo: Make this handle multi-cycle communication between
+ // commit and IEW.
+ if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+ !iewStage->hasStoresToWB() && !committedStores[tid]) {
+ checkEmptyROB[tid] = false;
+ toIEW->commitInfo[tid].usedROB = true;
+ toIEW->commitInfo[tid].emptyROB = true;
+ toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+ wroteToTimeBuffer = true;
+ }
+
}
}
@@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
thread[tid]->funcExeInst--;
- head_inst->setAtCommit();
-
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
head_inst->isMemBarrier() ||
@@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
"instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
head_inst->seqNum, head_inst->readPC());
- // Hack to make sure syscalls/memory barriers/quiesces
- // aren't executed until all stores write back their data.
- // This direct communication shouldn't be used for
- // anything other than this.
- if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
- head_inst->isQuiesce()) &&
- iewStage->hasStoresToWB())
- {
+ if (inst_num > 0 || iewStage->hasStoresToWB()) {
DPRINTF(Commit, "Waiting for all stores to writeback.\n");
return false;
- } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
- DPRINTF(Commit, "Waiting to become head of commit.\n");
- return false;
}
toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
return false;
} else if (head_inst->isLoad()) {
+ if (inst_num > 0 || iewStage->hasStoresToWB()) {
+ DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+ return false;
+ }
+
+ assert(head_inst->uncacheable());
DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
head_inst->seqNum, head_inst->readPC());
@@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
panic("Thread sync instructions are not handled yet.\n");
}
+ // Check if the instruction caused a fault. If so, trap.
+ Fault inst_fault = head_inst->getFault();
+
// Stores mark themselves as completed.
- if (!head_inst->isStore()) {
+ if (!head_inst->isStore() && inst_fault == NoFault) {
head_inst->setCompleted();
}
@@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
}
#endif
- // Check if the instruction caused a fault. If so, trap.
- Fault inst_fault = head_inst->getFault();
-
// DTB will sometimes need the machine instruction for when
// faults happen. So we will set it here, prior to the DTB
// possibly needing it for its fault.
@@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
if (inst_fault != NoFault) {
- head_inst->setCompleted();
DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
head_inst->seqNum, head_inst->readPC());
@@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
return false;
}
+ head_inst->setCompleted();
+
#if USE_CHECKER
if (cpu->checker && head_inst->isStore()) {
cpu->checker->verify(head_inst);
@@ -1082,6 +1117,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
commitStatus[tid] = TrapPending;
+ if (head_inst->traceData) {
+ head_inst->traceData->setFetchSeq(head_inst->seqNum);
+ head_inst->traceData->setCPSeq(thread[tid]->numInst);
+ head_inst->traceData->dump();
+ delete head_inst->traceData;
+ head_inst->traceData = NULL;
+ }
+
// Generate trap squash event.
generateTrapEvent(tid);
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1123,6 +1166,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Finally clear the head ROB entry.
rob->retireHead(tid);
+ // If this was a store, record it for this cycle.
+ if (head_inst->isStore())
+ committedStores[tid] = true;
+
// Return true to indicate that we have committed an instruction.
return true;
}
@@ -1167,7 +1214,8 @@ DefaultCommit<Impl>::getInsts()
int tid = inst->threadNumber;
if (!inst->isSquashed() &&
- commitStatus[tid] != ROBSquashing) {
+ commitStatus[tid] != ROBSquashing &&
+ commitStatus[tid] != TrapPending) {
changedROBNumEntries[tid] = true;
DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 38e6a0b5b..354e3c490 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
lastRunningCycle = curTick;
timesIdled++;
} else {
- tickEvent.schedule(curTick + cycles(1));
+ tickEvent.schedule(nextCycle(curTick + cycles(1)));
DPRINTF(O3CPU, "Scheduling next tick!\n");
}
}
@@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume()
#endif
if (!tickEvent.scheduled())
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
_status = Running;
}
@@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
ThreadContext *tc = threadContexts[i];
if (tc->status() == ThreadContext::Active && _status != Running) {
_status = Running;
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
}
if (!tickEvent.scheduled())
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
template <class Impl>
@@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU()
idleCycles += (curTick - 1) - lastRunningCycle;
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ea374dd57..0ab20ba2a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
- tickEvent.reschedule(curTick + cycles(delay));
+ tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
else if (!tickEvent.scheduled())
- tickEvent.schedule(curTick + cycles(delay));
+ tickEvent.schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
{
// Schedule thread to activate, regardless of its current state.
if (activateThreadEvent[tid].squashed())
- activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+ activateThreadEvent[tid].
+ reschedule(nextCycle(curTick + cycles(delay)));
else if (!activateThreadEvent[tid].scheduled())
- activateThreadEvent[tid].schedule(curTick + cycles(delay));
+ activateThreadEvent[tid].
+ schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
{
// Schedule thread to activate, regardless of its current state.
if (deallocateContextEvent[tid].squashed())
- deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+ deallocateContextEvent[tid].
+ reschedule(nextCycle(curTick + cycles(delay)));
else if (!deallocateContextEvent[tid].scheduled())
- deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+ deallocateContextEvent[tid].
+ schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 1256dd233..663cd3142 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -620,6 +620,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
fault = TheISA::genMachineCheckFault();
delete mem_req;
memReq[tid] = NULL;
+ warn("Bad address!\n");
}
assert(retryPkt == NULL);
assert(retryTid == -1);
@@ -670,11 +671,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
// Get rid of the retrying packet if it was from this thread.
if (retryTid == tid) {
assert(cacheBlocked);
- cacheBlocked = false;
- retryTid = -1;
- delete retryPkt->req;
- delete retryPkt;
+ if (retryPkt) {
+ delete retryPkt->req;
+ delete retryPkt;
+ }
retryPkt = NULL;
+ retryTid = -1;
}
fetchStatus[tid] = Squashing;
@@ -1150,7 +1152,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
///FIXME This needs to be more robust in dealing with delay slots
#if !ISA_HAS_DELAY_SLOT
- predicted_branch |=
+// predicted_branch |=
#endif
lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
predicted_branch |= (next_PC != fetch_NPC);
@@ -1221,7 +1223,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// until commit handles the fault. The only other way it can
// wake up is if a squash comes along and changes the PC.
#if FULL_SYSTEM
- assert(numInst != fetchWidth);
+ assert(numInst < fetchWidth);
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
// We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..4883e5a5c 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1153,19 +1153,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
inst->setCanCommit();
instQueue.insertBarrier(inst);
add_to_iq = false;
- } else if (inst->isNonSpeculative()) {
- DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
- "encountered, skipping.\n", tid);
-
- // Same as non-speculative stores.
- inst->setCanCommit();
-
- // Specifically insert it as nonspeculative.
- instQueue.insertNonSpec(inst);
-
- ++iewDispNonSpecInsts;
-
- add_to_iq = false;
} else if (inst->isNop()) {
DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
"skipping.\n", tid);
@@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
} else {
add_to_iq = true;
}
+ if (inst->isNonSpeculative()) {
+ DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+ "encountered, skipping.\n", tid);
+
+ // Same as non-speculative stores.
+ inst->setCanCommit();
+
+ // Specifically insert it as nonspeculative.
+ instQueue.insertNonSpec(inst);
+
+ ++iewDispNonSpecInsts;
+
+ add_to_iq = false;
+ }
// If the instruction queue is not full, then add the
// instruction.
@@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts()
predictedNotTakenIncorrect++;
}
} else if (ldstQueue.violation(tid)) {
+ assert(inst->isMemRef());
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
@@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts()
// Ensure the violating instruction is older than
// current squash
- if (fetchRedirect[tid] &&
- violator->seqNum >= toCommit->squashedSeqNum[tid])
+/* if (fetchRedirect[tid] &&
+ violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
continue;
-
+*/
fetchRedirect[tid] = true;
// Tell the instruction queue that a violation has occured.
@@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts()
squashDueToMemBlocked(inst, tid);
}
+ } else {
+ // Reset any state associated with redirects that will not
+ // be used.
+ if (ldstQueue.violation(tid)) {
+ assert(inst->isMemRef());
+
+ DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+ DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
+ DPRINTF(IEW, "Violation will not be handled because "
+ "already squashing\n");
+
+ ++memOrderViolationEvents;
+ }
+ if (ldstQueue.loadBlocked(tid) &&
+ !ldstQueue.isLoadBlockedHandled(tid)) {
+ DPRINTF(IEW, "Load operation couldn't execute because the "
+ "memory system is blocked. PC: %#x [sn:%lli]\n",
+ inst->readPC(), inst->seqNum);
+ DPRINTF(IEW, "Blocked load will not be handled because "
+ "already squashing\n");
+
+ ldstQueue.setLoadBlockedHandled(tid);
+ }
+
}
}
@@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick()
//DPRINTF(IEW,"NonspecInst from thread %i",tid);
if (fromCommit->commitInfo[tid].uncached) {
instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+ fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
} else {
instQueue.scheduleNonSpec(
fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d5781d89d..79e03d4bf 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
unsigned tid = (*inst_it).second->threadNumber;
+ (*inst_it).second->setAtCommit();
+
(*inst_it).second->setCanIssue();
if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +962,8 @@ template <class Impl>
void
InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
{
+ DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+ resched_inst->clearCanIssue();
memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
}
@@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
completed_inst->memOpDone = true;
memDepUnit[tid].completed(completed_inst);
-
count[tid]--;
}
@@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
- } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+ } else if (!squashed_inst->isStoreConditional() ||
+ !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
+ if (ns_inst_it == nonSpecInsts.end()) {
+ assert(squashed_inst->getFault() != NoFault);
+ } else {
- (*ns_inst_it).second = NULL;
+ (*ns_inst_it).second = NULL;
- nonSpecInsts.erase(ns_inst_it);
+ nonSpecInsts.erase(ns_inst_it);
- ++iqSquashedNonSpecRemoved;
+ ++iqSquashedNonSpecRemoved;
+ }
}
// Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2419afe29..1b10843f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
(load_idx != loadHead || !load_inst->isAtCommit())) {
iewStage->rescheduleMemInst(load_inst);
++lsqRescheduledLoads;
+
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the proper
+ // place to really handle request deletes.
+ delete req;
return TheISA::genMachineCheckFault();
}
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
if (store_size == 0)
continue;
+ else if (storeQueue[store_idx].inst->uncacheable())
+ continue;
+
+ assert(storeQueue[store_idx].inst->effAddrValid);
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
storeQueue[store_idx].inst->effAddr;
// If the store's data has all of the data needed, we can forward.
- if (store_has_lower_limit && store_has_upper_limit) {
+ if ((store_has_lower_limit && store_has_upper_limit)) {
// Get shift amount for offset into the store's data.
int shift_amt = req->getVaddr() & (store_size - 1);
// @todo: Magic number, assumes byte addressing
@@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// If it's already been written back, then don't worry about
// stalling on it.
if (storeQueue[store_idx].completed) {
+ panic("Should not check one of these");
continue;
}
@@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
iewStage->decrWb(load_inst->seqNum);
+ load_inst->clearIssued();
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
@@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
"Store idx %i to load addr %#x\n",
store_idx, req->getVaddr());
- ++lsqBlockedLoads;
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the
+ // proper place to really handle request deletes.
+ delete req;
+
return NoFault;
}
}
@@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// Delete state and data packet because a load retry
// initiates a pipeline restart; it does not retry.
delete state;
+ delete data_pkt->req;
delete data_pkt;
+ req = NULL;
+
if (result == Packet::BadAddress) {
return TheISA::genMachineCheckFault();
}
@@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// If the cache was blocked, or has become blocked due to the access,
// handle it.
if (lsq->cacheBlocked()) {
+ if (req)
+ delete req;
+
++lsqCacheBlocked;
iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3ba22a530..e70c960b3 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (isSwitchedOut() || inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
delete state;
+ delete pkt->req;
delete pkt;
return;
} else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
delete state;
+ delete pkt->req;
delete pkt;
}
@@ -403,12 +405,15 @@ template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
{
+ using namespace TheISA;
// Execute a specific load.
Fault load_fault = NoFault;
DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
inst->readPC(),inst->seqNum);
+ assert(!inst->isSquashed());
+
load_fault = inst->initiateAcc();
// If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
- if (!(inst->req && inst->req->isUncacheable()) ||
+ if (!(inst->hasRequest() && inst->uncacheable()) ||
inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
+ } else if (!loadBlocked()) {
+ assert(inst->effAddrValid);
+ int load_idx = inst->lqIdx;
+ incrLdIdx(load_idx);
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if (loadQueue[load_idx]->effAddrValid &&
+ (loadQueue[load_idx]->effAddr >> 8) ==
+ (inst->effAddr >> 8)) {
+ // A load incorrectly passed this load. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ DynInstPtr violator = loadQueue[load_idx];
+ if (!memDepViolator ||
+ (violator->seqNum < memDepViolator->seqNum)) {
+ memDepViolator = violator;
+ } else {
+ break;
+ }
+
+ ++lsqMemOrderViolation;
+
+ return genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
}
return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
store_inst->readPC(), store_inst->seqNum);
+ assert(!store_inst->isSquashed());
+
// Check the recently completed loads to see if any match this store's
// address. If so, then we have a memory ordering violation.
int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
++storesToWB;
}
- if (!memDepViolator) {
- while (load_idx != loadTail) {
- // Really only need to check loads that have actually executed
- // It's safe to check all loads because effAddr is set to
- // InvalAddr when the dyn inst is created.
-
- // @todo: For now this is extra conservative, detecting a
- // violation if the addresses match assuming all accesses
- // are quad word accesses.
-
- // @todo: Fix this, magic number being used here
- if ((loadQueue[load_idx]->effAddr >> 8) ==
- (store_inst->effAddr >> 8)) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- memDepViolator = loadQueue[load_idx];
- ++lsqMemOrderViolation;
-
- return genMachineCheckFault();
+ assert(store_inst->effAddrValid);
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+ // It's safe to check all loads because effAddr is set to
+ // InvalAddr when the dyn inst is created.
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if (loadQueue[load_idx]->effAddrValid &&
+ (loadQueue[load_idx]->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ DynInstPtr violator = loadQueue[load_idx];
+ if (!memDepViolator ||
+ (violator->seqNum < memDepViolator->seqNum)) {
+ memDepViolator = violator;
+ } else {
+ break;
}
- incrLdIdx(load_idx);
+ ++lsqMemOrderViolation;
+
+ return genMachineCheckFault();
}
- // If we've reached this point, there was no violation.
- memDepViolator = NULL;
+ incrLdIdx(load_idx);
}
return store_fault;
@@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores()
panic("LSQ sent out a bad address for a completed store!");
}
// Need to handle becoming blocked on a store.
- DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+ DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
"retry later\n",
inst->seqNum);
isStoreBlocked = true;
@@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
}
+ if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+ memDepViolator = NULL;
+ }
+
int store_idx = storeTail;
decrStIdx(store_idx);
@@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the proper
+ // place to really handle request deletes.
+ delete storeQueue[store_idx].req;
+
storeQueue[store_idx].req = NULL;
--stores;
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
inst_entry->regsReady = true;
}
+ // Clear the bit saying this instruction can issue.
+ inst->clearCanIssue();
+
// Add this instruction to the list of dependents.
store_entry->dependInsts.push_back(inst_entry);
@@ -357,7 +360,6 @@ void
MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
{
DynInstPtr temp_inst;
- bool found_inst = false;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
moveToReady(inst_entry);
- if (temp_inst == inst) {
- found_inst = true;
- }
-
instsToReplay.pop_front();
}
-
- assert(found_inst);
}
template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// known that the prev reg was outside the range of normal registers
// so the free list can avoid adding it.
prev_reg = renamed_reg;
-
- assert(renamed_reg < numPhysicalRegs + numMiscRegs);
}
DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4988df3c5..6e6ba2380 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt)
}
short dest = pkt->getDest();
+
+ // Make sure to clear the snoop commit flag so it doesn't think an
+ // access has been handled twice.
if (dest == Packet::Broadcast) {
port = findPort(pkt->getAddr(), pkt->getSrc());
+ pkt->flags &= ~SNOOP_COMMIT;
if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) {
bool success;
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 5c6ab0950..fc4660269 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -545,8 +545,13 @@ Cache<TagStore,Coherence>::access(PacketPtr &pkt)
//We are determining prefetches on access stream, call prefetcher
prefetcher->handleMiss(pkt, curTick);
}
+
+ Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+
if (!pkt->req->isUncacheable()) {
- blk = handleAccess(pkt, lat, writebacks);
+ if (!missQueue->findMSHR(blk_addr)) {
+ blk = handleAccess(pkt, lat, writebacks);
+ }
} else {
size = pkt->getSize();
}
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 25b8fcbeb..24ca9cfa2 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time)
MemCmd cmd = mshr->getTarget()->cmd;
mshr->pkt->setDest(Packet::Broadcast);
mshr->pkt->result = Packet::Unknown;
+ mshr->pkt->req = mshr->getTarget()->req;
mq.markPending(mshr, cmd);
mshr->order = order++;
cache->setMasterRequest(Request_MSHR, time);
diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py
index a66cd436e..5600d9f22 100644
--- a/tests/configs/o3-timing.py
+++ b/tests/configs/o3-timing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@ class MyCache(BaseCache):
mshrs = 10
tgts_per_mshr = 5
-cpu = DerivO3CPU()
+cpu = DerivO3CPU(cpu_id=0)
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
MyCache(size = '2MB'))
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
new file mode 100644
index 000000000..73a28200e
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
@@ -0,0 +1,64 @@
+[root]
+type=Root
+children=system
+dummy=0
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=AtomicSimpleCPU
+children=workload
+clock=1
+cpu_id=0
+defer_registration=false
+function_trace=false
+function_trace_start=0
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+phase=0
+progress_interval=0
+simulate_stalls=false
+system=system
+width=1
+workload=system.cpu.workload
+dcache_port=system.membus.port[2]
+icache_port=system.membus.port[1]
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=mcf mcf.in
+cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
+egid=100
+env=
+euid=100
+executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
+gid=100
+input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+zero=false
+port=system.membus.port[0]
+
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
new file mode 100644
index 000000000..2b86e6bfb
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
@@ -0,0 +1,57 @@
+[root]
+type=Root
+dummy=0
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+zero=false
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=mcf mcf.in
+executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
+input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+output=cout
+env=
+cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu]
+type=AtomicSimpleCPU
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+system=system
+cpu_id=0
+workload=system.cpu.workload
+clock=1
+phase=0
+defer_registration=false
+width=1
+function_trace=false
+function_trace_start=0
+simulate_stalls=false
+
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
new file mode 100644
index 000000000..41e6bfc52
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
@@ -0,0 +1,18 @@
+
+---------- Begin Simulation Statistics ----------
+host_inst_rate 624449 # Simulator instruction rate (inst/s)
+host_mem_usage 148644 # Number of bytes of host memory used
+host_seconds 2753.78 # Real time elapsed on the host
+host_tick_rate 624449 # Simulator tick rate (ticks/s)
+sim_freq 1000000000000 # Frequency of simulated ticks
+sim_insts 1719594534 # Number of instructions simulated
+sim_seconds 0.001720 # Number of seconds simulated
+sim_ticks 1719594533 # Number of ticks simulated
+system.cpu.idle_fraction 0 # Percentage of idle cycles
+system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
+system.cpu.numCycles 1719594534 # number of cpu cycles simulated
+system.cpu.num_insts 1719594534 # Number of instructions executed
+system.cpu.num_refs 774793634 # Number of memory references
+system.cpu.workload.PROG:num_syscalls 632 # Number of system calls
+
+---------- End Simulation Statistics ----------
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr
new file mode 100644
index 000000000..9c09fd847
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr
@@ -0,0 +1,7 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0xa2000 length 0x10.
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
+warn: Entering event queue @ 0. Starting simulation...
+warn: Ignoring request to flush register windows.
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
new file mode 100644
index 000000000..6711761e8
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
@@ -0,0 +1,33 @@
+
+MCF SPEC version 1.6.I
+by Andreas Loebel
+Copyright (c) 1998,1999 ZIB Berlin
+All Rights Reserved.
+
+nodes : 1800
+active arcs : 8190
+simplex iterations : 6837
+flow value : 12860044181
+new implicit arcs : 300000
+active arcs : 308190
+simplex iterations : 11843
+flow value : 9360043604
+new implicit arcs : 22787
+active arcs : 330977
+simplex iterations : 11931
+flow value : 9360043512
+checksum : 798014
+optimal
+M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Mar 23 2007 22:37:06
+M5 started Fri Mar 23 22:37:22 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1719594533 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index 2296e2545..cc4477d68 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -1,48 +1,7 @@
[root]
type=Root
children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
[system]
type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
commitToIEWDelay=1
commitToRenameDelay=1
commitWidth=8
+cpu_id=0
decodeToFetchDelay=1
decodeToRenameDelay=1
decodeWidth=8
@@ -417,12 +377,3 @@ range=0:134217727
zero=false
port=system.membus.port[0]
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
index 1b1b58f1b..f50559125 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
@@ -1,9 +1,6 @@
[root]
type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
[system.physmem]
type=PhysicalMemory
@@ -173,6 +170,7 @@ type=DerivO3CPU
clock=1
phase=0
numThreads=1
+cpu_id=0
activity=0
workload=system.cpu.workload
checker=null
@@ -367,51 +365,3 @@ clock=1000
width=64
responder_set=false
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 4e3fdbcd2..4b323618c 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
---------- Begin Simulation Statistics ----------
global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits 675 # Number of BTB hits
-global.BPredUnit.BTBLookups 2343 # Number of BTB lookups
+global.BPredUnit.BTBHits 669 # Number of BTB hits
+global.BPredUnit.BTBLookups 2338 # Number of BTB lookups
global.BPredUnit.RASInCorrect 76 # Number of incorrect RAS predictions.
global.BPredUnit.condIncorrect 437 # Number of conditional branches incorrect
-global.BPredUnit.condPredicted 1563 # Number of conditional branches predicted
-global.BPredUnit.lookups 5229 # Number of BP lookups
+global.BPredUnit.condPredicted 1559 # Number of conditional branches predicted
+global.BPredUnit.lookups 5224 # Number of BP lookups
global.BPredUnit.usedRAS 2821 # Number of times the RAS was used to get a target.
-host_inst_rate 11609 # Simulator instruction rate (inst/s)
-host_mem_usage 177052 # Number of bytes of host memory used
-host_seconds 0.48 # Real time elapsed on the host
-host_tick_rate 2887871 # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads 23 # Number of conflicting loads.
-memdepunit.memDep.conflictingStores 117 # Number of conflicting stores.
-memdepunit.memDep.insertedLoads 3775 # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores 3734 # Number of stores inserted to the mem dependence unit.
+host_inst_rate 12539 # Simulator instruction rate (inst/s)
+host_mem_usage 156028 # Number of bytes of host memory used
+host_seconds 0.45 # Real time elapsed on the host
+host_tick_rate 3120138 # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads 24 # Number of conflicting loads.
+memdepunit.memDep.conflictingStores 12 # Number of conflicting stores.
+memdepunit.memDep.insertedLoads 3770 # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores 3723 # Number of stores inserted to the mem dependence unit.
sim_freq 1000000000000 # Frequency of simulated ticks
sim_insts 5623 # Number of instructions simulated
sim_seconds 0.000001 # Number of seconds simulated
-sim_ticks 1400135 # Number of ticks simulated
+sim_ticks 1400134 # Number of ticks simulated
system.cpu.commit.COM:branches 862 # Number of branches committed
-system.cpu.commit.COM:bw_lim_events 97 # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events 101 # number cycles where commit BW limit reached
system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits
system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples 51243
+system.cpu.commit.COM:committed_per_cycle.samples 52214
system.cpu.commit.COM:committed_per_cycle.min_value 0
- 0 48519 9468.42%
- 1 1590 310.29%
- 2 483 94.26%
- 3 227 44.30%
- 4 131 25.56%
- 5 104 20.30%
- 6 61 11.90%
- 7 31 6.05%
- 8 97 18.93%
+ 0 49499 9480.02%
+ 1 1576 301.83%
+ 2 483 92.50%
+ 3 233 44.62%
+ 4 133 25.47%
+ 5 102 19.53%
+ 6 60 11.49%
+ 7 27 5.17%
+ 8 101 19.34%
system.cpu.commit.COM:committed_per_cycle.max_value 8
system.cpu.commit.COM:committed_per_cycle.end_dist
@@ -46,66 +46,66 @@ system.cpu.commit.COM:swp_count 0 # Nu
system.cpu.commit.branchMispredicts 368 # The number of times a branch was mispredicted
system.cpu.commit.commitCommittedInsts 5640 # The number of committed instructions
system.cpu.commit.commitNonSpecStalls 17 # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts 13830 # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts 13804 # The number of squashed insts skipped by commit
system.cpu.committedInsts 5623 # Number of Instructions Simulated
system.cpu.committedInsts_total 5623 # Number of Instructions Simulated
-system.cpu.cpi 249.001423 # CPI: Cycles Per Instruction
-system.cpu.cpi_total 249.001423 # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses 1600 # number of ReadReq accesses(hits+misses)
+system.cpu.cpi 249.001245 # CPI: Cycles Per Instruction
+system.cpu.cpi_total 249.001245 # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses 1596 # number of ReadReq accesses(hits+misses)
system.cpu.dcache.ReadReq_avg_miss_latency 6986.684848 # average ReadReq miss latency
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6882.626263 # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits 1435 # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits 1431 # number of ReadReq hits
system.cpu.dcache.ReadReq_miss_latency 1152803 # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate 0.103125 # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate 0.103383 # miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_misses 165 # number of ReadReq misses
system.cpu.dcache.ReadReq_mshr_hits 66 # number of ReadReq MSHR hits
system.cpu.dcache.ReadReq_mshr_miss_latency 681380 # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate 0.061875 # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate 0.062030 # mshr miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_mshr_misses 99 # number of ReadReq MSHR misses
system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 5293.047244 # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5141.082192 # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 5293.200787 # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5141.095890 # average WriteReq mshr miss latency
system.cpu.dcache.WriteReq_hits 558 # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency 1344434 # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency 1344473 # number of WriteReq miss cycles
system.cpu.dcache.WriteReq_miss_rate 0.312808 # miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_misses 254 # number of WriteReq misses
system.cpu.dcache.WriteReq_mshr_hits 181 # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency 375299 # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency 375300 # number of WriteReq MSHR miss cycles
system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses
system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets 3366.651163 # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs 11.587209 # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets 3366.930233 # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs 11.563953 # Average number of references to valid blocks.
system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked_no_targets 43 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets 144766 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets 144778 # number of cycles access was blocked
system.cpu.dcache.cache_copies 0 # number of cache copies performed
-system.cpu.dcache.demand_accesses 2412 # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 5959.992840 # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 6143.482558 # average overall mshr miss latency
-system.cpu.dcache.demand_hits 1993 # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency 2497237 # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate 0.173715 # miss rate for demand accesses
+system.cpu.dcache.demand_accesses 2408 # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 5960.085919 # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 6143.488372 # average overall mshr miss latency
+system.cpu.dcache.demand_hits 1989 # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency 2497276 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate 0.174003 # miss rate for demand accesses
system.cpu.dcache.demand_misses 419 # number of demand (read+write) misses
system.cpu.dcache.demand_mshr_hits 247 # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency 1056679 # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate 0.071310 # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency 1056680 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate 0.071429 # mshr miss rate for demand accesses
system.cpu.dcache.demand_mshr_misses 172 # number of demand (read+write) MSHR misses
system.cpu.dcache.fast_writes 0 # number of fast writes performed
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses 2412 # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 5959.992840 # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 6143.482558 # average overall mshr miss latency
+system.cpu.dcache.overall_accesses 2408 # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 5960.085919 # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 6143.488372 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits 1993 # number of overall hits
-system.cpu.dcache.overall_miss_latency 2497237 # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate 0.173715 # miss rate for overall accesses
+system.cpu.dcache.overall_hits 1989 # number of overall hits
+system.cpu.dcache.overall_miss_latency 2497276 # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate 0.174003 # miss rate for overall accesses
system.cpu.dcache.overall_misses 419 # number of overall misses
system.cpu.dcache.overall_mshr_hits 247 # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency 1056679 # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate 0.071310 # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency 1056680 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate 0.071429 # mshr miss rate for overall accesses
system.cpu.dcache.overall_mshr_misses 172 # number of overall MSHR misses
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
@@ -121,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.dcache.replacements 0 # number of replacements
system.cpu.dcache.sampled_refs 172 # Sample count of references to valid blocks.
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse 101.349720 # Cycle average of tags in use
-system.cpu.dcache.total_refs 1993 # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse 101.349670 # Cycle average of tags in use
+system.cpu.dcache.total_refs 1989 # Total number of references to valid blocks.
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.dcache.writebacks 0 # number of writebacks
system.cpu.decode.DECODE:BlockedCycles 17501 # Number of cycles decode is blocked
system.cpu.decode.DECODE:BranchMispred 70 # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved 168 # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts 29666 # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles 28130 # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles 5553 # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles 2529 # Number of cycles decode is squashing
+system.cpu.decode.DECODE:BranchResolved 167 # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts 29609 # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles 29114 # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles 5540 # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles 2527 # Number of cycles decode is squashing
system.cpu.decode.DECODE:SquashedInsts 200 # Number of squashed instructions handled by decode
system.cpu.decode.DECODE:UnblockCycles 60 # Number of cycles decode is unblocking
-system.cpu.fetch.Branches 5229 # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines 6371 # Number of cache lines fetched
-system.cpu.fetch.Cycles 13322 # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes 296 # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts 35572 # Number of instructions fetch has processed
+system.cpu.fetch.Branches 5224 # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines 6367 # Number of cache lines fetched
+system.cpu.fetch.Cycles 13308 # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes 295 # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts 35526 # Number of instructions fetch has processed
system.cpu.fetch.SquashCycles 2057 # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate 0.097242 # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles 6371 # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches 3496 # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate 0.661522 # Number of inst fetches per cycle
+system.cpu.fetch.branchRate 0.095429 # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles 7360 # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches 3490 # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate 0.648972 # Number of inst fetches per cycle
system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples 53773
+system.cpu.fetch.rateDist.samples 54742
system.cpu.fetch.rateDist.min_value 0
- 0 46825 8707.90%
- 1 199 37.01%
- 2 504 93.73%
- 3 1429 265.75%
- 4 1462 271.88%
- 5 245 45.56%
- 6 322 59.88%
- 7 1223 227.44%
- 8 1564 290.85%
+ 0 47805 8732.78%
+ 1 199 36.35%
+ 2 500 91.34%
+ 3 1426 260.49%
+ 4 1459 266.52%
+ 5 244 44.57%
+ 6 327 59.73%
+ 7 1225 223.78%
+ 8 1557 284.43%
system.cpu.fetch.rateDist.max_value 8
system.cpu.fetch.rateDist.end_dist
-system.cpu.icache.ReadReq_accesses 6370 # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 5088.614350 # average ReadReq miss latency
+system.cpu.icache.ReadReq_accesses 6366 # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 5085.923937 # average ReadReq miss latency
system.cpu.icache.ReadReq_avg_mshr_miss_latency 4278.032258 # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits 5924 # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency 2269522 # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate 0.070016 # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses 446 # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits 136 # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_hits 5919 # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency 2273408 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate 0.070217 # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses 447 # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits 137 # number of ReadReq MSHR hits
system.cpu.icache.ReadReq_mshr_miss_latency 1326190 # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate 0.048666 # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate 0.048696 # mshr miss rate for ReadReq accesses
system.cpu.icache.ReadReq_mshr_misses 310 # number of ReadReq MSHR misses
system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets 3444.375000 # average number of cycles each access was blocked
-system.cpu.icache.avg_refs 19.109677 # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets 3443.500000 # average number of cycles each access was blocked
+system.cpu.icache.avg_refs 19.093548 # Average number of references to valid blocks.
system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked
system.cpu.icache.blocked_no_targets 8 # number of cycles access was blocked
system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets 27555 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets 27548 # number of cycles access was blocked
system.cpu.icache.cache_copies 0 # number of cache copies performed
-system.cpu.icache.demand_accesses 6370 # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 5088.614350 # average overall miss latency
+system.cpu.icache.demand_accesses 6366 # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 5085.923937 # average overall miss latency
system.cpu.icache.demand_avg_mshr_miss_latency 4278.032258 # average overall mshr miss latency
-system.cpu.icache.demand_hits 5924 # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency 2269522 # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate 0.070016 # miss rate for demand accesses
-system.cpu.icache.demand_misses 446 # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits 136 # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_hits 5919 # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency 2273408 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate 0.070217 # miss rate for demand accesses
+system.cpu.icache.demand_misses 447 # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits 137 # number of demand (read+write) MSHR hits
system.cpu.icache.demand_mshr_miss_latency 1326190 # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate 0.048666 # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate 0.048696 # mshr miss rate for demand accesses
system.cpu.icache.demand_mshr_misses 310 # number of demand (read+write) MSHR misses
system.cpu.icache.fast_writes 0 # number of fast writes performed
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses 6370 # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 5088.614350 # average overall miss latency
+system.cpu.icache.overall_accesses 6366 # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 5085.923937 # average overall miss latency
system.cpu.icache.overall_avg_mshr_miss_latency 4278.032258 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits 5924 # number of overall hits
-system.cpu.icache.overall_miss_latency 2269522 # number of overall miss cycles
-system.cpu.icache.overall_miss_rate 0.070016 # miss rate for overall accesses
-system.cpu.icache.overall_misses 446 # number of overall misses
-system.cpu.icache.overall_mshr_hits 136 # number of overall MSHR hits
+system.cpu.icache.overall_hits 5919 # number of overall hits
+system.cpu.icache.overall_miss_latency 2273408 # number of overall miss cycles
+system.cpu.icache.overall_miss_rate 0.070217 # miss rate for overall accesses
+system.cpu.icache.overall_misses 447 # number of overall misses
+system.cpu.icache.overall_mshr_hits 137 # number of overall MSHR hits
system.cpu.icache.overall_mshr_miss_latency 1326190 # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate 0.048666 # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate 0.048696 # mshr miss rate for overall accesses
system.cpu.icache.overall_mshr_misses 310 # number of overall MSHR misses
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
@@ -218,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.icache.replacements 0 # number of replacements
system.cpu.icache.sampled_refs 310 # Sample count of references to valid blocks.
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse 147.070827 # Cycle average of tags in use
-system.cpu.icache.total_refs 5924 # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse 147.070711 # Cycle average of tags in use
+system.cpu.icache.total_refs 5919 # Total number of references to valid blocks.
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.icache.writebacks 0 # number of writebacks
-system.cpu.idleCycles 1346363 # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches 2364 # Number of branches executed
+system.cpu.idleCycles 1345393 # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches 2362 # Number of branches executed
system.cpu.iew.EXEC:nop 48 # number of nop insts executed
-system.cpu.iew.EXEC:rate 0.251650 # Inst execution rate
-system.cpu.iew.EXEC:refs 5460 # number of memory reference insts executed
-system.cpu.iew.EXEC:stores 2123 # Number of stores executed
+system.cpu.iew.EXEC:rate 0.247123 # Inst execution rate
+system.cpu.iew.EXEC:refs 5464 # number of memory reference insts executed
+system.cpu.iew.EXEC:stores 2131 # Number of stores executed
system.cpu.iew.EXEC:swp 0 # number of swp insts executed
system.cpu.iew.WB:consumers 6466 # num instructions consuming a value
-system.cpu.iew.WB:count 11620 # cumulative count of insts written-back
-system.cpu.iew.WB:fanout 0.798639 # average fanout of values written-back
+system.cpu.iew.WB:count 11625 # cumulative count of insts written-back
+system.cpu.iew.WB:fanout 0.798948 # average fanout of values written-back
system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ
system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers 5164 # num instructions producing a value
-system.cpu.iew.WB:rate 0.216094 # insts written-back per cycle
-system.cpu.iew.WB:sent 11692 # cumulative count of insts sent to commit
+system.cpu.iew.WB:producers 5166 # num instructions producing a value
+system.cpu.iew.WB:rate 0.212360 # insts written-back per cycle
+system.cpu.iew.WB:sent 11698 # cumulative count of insts sent to commit
system.cpu.iew.branchMispredicts 401 # Number of branch mispredicts detected at execute
system.cpu.iew.iewBlockCycles 7230 # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts 3775 # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts 3770 # Number of dispatched load instructions
system.cpu.iew.iewDispNonSpecInsts 24 # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts 2557 # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts 3734 # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts 19465 # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts 3337 # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts 308 # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts 13532 # Number of executed instructions
+system.cpu.iew.iewDispSquashedInsts 2547 # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts 3723 # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts 19439 # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts 3333 # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts 305 # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts 13528 # Number of executed instructions
system.cpu.iew.iewIQFullEvents 10 # Number of times the IQ has become full, causing a stall
system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle
system.cpu.iew.iewLSQFullEvents 1 # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles 2529 # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles 2527 # Number of cycles IEW is squashing
system.cpu.iew.iewUnblockCycles 39 # Number of cycles IEW is unblocking
-system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding
system.cpu.iew.lsq.thread.0.cacheBlocked 1656 # Number of times an access to memory failed due to the cache being blocked
system.cpu.iew.lsq.thread.0.forwLoads 81 # Number of loads that had data forwarded from stores
system.cpu.iew.lsq.thread.0.ignoredResponses 3 # Number of memory responses ignored because the instruction is squashed
system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address
system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation 40 # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation 61 # Number of memory ordering violations
system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads 2796 # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores 2922 # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents 40 # Number of memory order violations
+system.cpu.iew.lsq.thread.0.squashedLoads 2791 # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores 2911 # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents 61 # Number of memory order violations
system.cpu.iew.predictedNotTakenIncorrect 279 # Number of branches that were predicted not taken incorrectly
system.cpu.iew.predictedTakenIncorrect 122 # Number of branches that were predicted taken incorrectly
system.cpu.ipc 0.004016 # IPC: Instructions Per Cycle
system.cpu.ipc_total 0.004016 # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0 13840 # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0 13833 # Type of FU issued
system.cpu.iq.ISSUE:FU_type_0.start_dist
(null) 2 0.01% # Type of FU issued
- IntAlu 8249 59.60% # Type of FU issued
+ IntAlu 8240 59.57% # Type of FU issued
IntMult 1 0.01% # Type of FU issued
IntDiv 0 0.00% # Type of FU issued
FloatAdd 2 0.01% # Type of FU issued
@@ -279,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
FloatMult 0 0.00% # Type of FU issued
FloatDiv 0 0.00% # Type of FU issued
FloatSqrt 0 0.00% # Type of FU issued
- MemRead 3432 24.80% # Type of FU issued
- MemWrite 2154 15.56% # Type of FU issued
+ MemRead 3428 24.78% # Type of FU issued
+ MemWrite 2160 15.61% # Type of FU issued
IprAccess 0 0.00% # Type of FU issued
InstPrefetch 0 0.00% # Type of FU issued
system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt 86 # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate 0.006214 # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt 87 # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate 0.006289 # FU busy rate (busy events/executed inst)
system.cpu.iq.ISSUE:fu_full.start_dist
(null) 0 0.00% # attempts to use FU when none available
- IntAlu 1 1.16% # attempts to use FU when none available
+ IntAlu 1 1.15% # attempts to use FU when none available
IntMult 0 0.00% # attempts to use FU when none available
IntDiv 0 0.00% # attempts to use FU when none available
FloatAdd 0 0.00% # attempts to use FU when none available
@@ -297,38 +297,38 @@ system.cpu.iq.ISSUE:fu_full.start_dist
FloatMult 0 0.00% # attempts to use FU when none available
FloatDiv 0 0.00% # attempts to use FU when none available
FloatSqrt 0 0.00% # attempts to use FU when none available
- MemRead 53 61.63% # attempts to use FU when none available
- MemWrite 32 37.21% # attempts to use FU when none available
+ MemRead 54 62.07% # attempts to use FU when none available
+ MemWrite 32 36.78% # attempts to use FU when none available
IprAccess 0 0.00% # attempts to use FU when none available
InstPrefetch 0 0.00% # attempts to use FU when none available
system.cpu.iq.ISSUE:fu_full.end_dist
system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples 53773
+system.cpu.iq.ISSUE:issued_per_cycle.samples 54742
system.cpu.iq.ISSUE:issued_per_cycle.min_value 0
- 0 46903 8722.41%
- 1 3262 606.62%
- 2 1316 244.73%
- 3 1665 309.63%
- 4 333 61.93%
- 5 188 34.96%
- 6 73 13.58%
- 7 23 4.28%
- 8 10 1.86%
+ 0 47874 8745.39%
+ 1 3270 597.35%
+ 2 1302 237.84%
+ 3 1673 305.62%
+ 4 327 59.73%
+ 5 188 34.34%
+ 6 75 13.70%
+ 7 22 4.02%
+ 8 11 2.01%
system.cpu.iq.ISSUE:issued_per_cycle.max_value 8
system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate 0.257378 # Inst issue rate
-system.cpu.iq.iqInstsAdded 19393 # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued 13840 # Number of instructions issued
+system.cpu.iq.ISSUE:rate 0.252694 # Inst issue rate
+system.cpu.iq.iqInstsAdded 19367 # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued 13833 # Number of instructions issued
system.cpu.iq.iqNonSpecInstsAdded 24 # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined 13381 # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued 72 # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined 13339 # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued 73 # Number of squashed instructions issued
system.cpu.iq.iqSquashedNonSpecRemoved 7 # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined 9575 # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined 9527 # Number of squashed operands that are examined and possibly removed from graph
system.cpu.l2cache.ReadReq_accesses 480 # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 4520.691667 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency 4520.693750 # average ReadReq miss latency
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2303.372917 # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency 2169932 # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency 2169933 # number of ReadReq miss cycles
system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_misses 480 # number of ReadReq misses
system.cpu.l2cache.ReadReq_mshr_miss_latency 1105619 # number of ReadReq MSHR miss cycles
@@ -343,10 +343,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 #
system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
system.cpu.l2cache.demand_accesses 480 # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 4520.691667 # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency 4520.693750 # average overall miss latency
system.cpu.l2cache.demand_avg_mshr_miss_latency 2303.372917 # average overall mshr miss latency
system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency 2169932 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency 2169933 # number of demand (read+write) miss cycles
system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
system.cpu.l2cache.demand_misses 480 # number of demand (read+write) misses
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
@@ -357,11 +357,11 @@ system.cpu.l2cache.fast_writes 0 # nu
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.l2cache.overall_accesses 480 # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 4520.691667 # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency 4520.693750 # average overall miss latency
system.cpu.l2cache.overall_avg_mshr_miss_latency 2303.372917 # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.l2cache.overall_hits 0 # number of overall hits
-system.cpu.l2cache.overall_miss_latency 2169932 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency 2169933 # number of overall miss cycles
system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
system.cpu.l2cache.overall_misses 480 # number of overall misses
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
@@ -382,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.l2cache.replacements 0 # number of replacements
system.cpu.l2cache.sampled_refs 480 # Sample count of references to valid blocks.
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse 248.469634 # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse 248.469469 # Cycle average of tags in use
system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks.
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.l2cache.writebacks 0 # number of writebacks
-system.cpu.numCycles 53773 # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles 7860 # Number of cycles rename is blocking
+system.cpu.numCycles 54742 # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles 7851 # Number of cycles rename is blocking
system.cpu.rename.RENAME:CommittedMaps 4051 # Number of HB maps that are committed
system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles 28280 # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents 453 # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:IdleCycles 29263 # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents 458 # Number of times rename has blocked due to LSQ full
system.cpu.rename.RENAME:ROBFullEvents 8 # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups 36016 # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts 29203 # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands 20142 # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles 5460 # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles 2529 # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles 483 # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps 16091 # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles 9161 # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:RenameLookups 35953 # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts 29156 # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands 20115 # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles 5451 # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles 2527 # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles 486 # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps 16064 # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles 9164 # count of cycles rename stalled for serializing inst
system.cpu.rename.RENAME:serializingInsts 27 # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts 828 # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts 831 # count of insts added to the skid buffer
system.cpu.rename.RENAME:tempSerializingInsts 21 # count of temporary serializing insts renamed
system.cpu.timesIdled 369 # Number of times that the entire CPU went into an idle state and unscheduled itself
system.cpu.workload.PROG:num_syscalls 17 # Number of system calls
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
index eb1796ead..684350ff9 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
@@ -1,2 +1,3 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
warn: Entering event queue @ 0. Starting simulation...
+warn: Increasing stack size by one page.
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index 511bc594d..cbdc4ee25 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,8 +6,9 @@ The Regents of The University of Michigan
All Rights Reserved
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:06:54 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
-Exiting @ tick 1400135 because target called exit()
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:12 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1400134 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index db88e7673..ea499f4f1 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -1,48 +1,7 @@
[root]
type=Root
children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
[system]
type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
commitToIEWDelay=1
commitToRenameDelay=1
commitWidth=8
+cpu_id=0
decodeToFetchDelay=1
decodeToRenameDelay=1
decodeWidth=8
@@ -417,12 +377,3 @@ range=0:134217727
zero=false
port=system.membus.port[0]
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
index 9ee1931ca..6672039dd 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
@@ -1,9 +1,6 @@
[root]
type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
[system.physmem]
type=PhysicalMemory
@@ -173,6 +170,7 @@ type=DerivO3CPU
clock=1
phase=0
numThreads=1
+cpu_id=0
activity=0
workload=system.cpu.workload
checker=null
@@ -367,51 +365,3 @@ clock=1000
width=64
responder_set=false
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index 3aae57d12..f855ff850 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -8,10 +8,10 @@ global.BPredUnit.condIncorrect 218 # Nu
global.BPredUnit.condPredicted 459 # Number of conditional branches predicted
global.BPredUnit.lookups 898 # Number of BP lookups
global.BPredUnit.usedRAS 171 # Number of times the RAS was used to get a target.
-host_inst_rate 22132 # Simulator instruction rate (inst/s)
-host_mem_usage 176684 # Number of bytes of host memory used
-host_seconds 0.11 # Real time elapsed on the host
-host_tick_rate 6945216 # Simulator tick rate (ticks/s)
+host_inst_rate 12517 # Simulator instruction rate (inst/s)
+host_mem_usage 155528 # Number of bytes of host memory used
+host_seconds 0.19 # Real time elapsed on the host
+host_tick_rate 3937113 # Simulator tick rate (ticks/s)
memdepunit.memDep.conflictingLoads 10 # Number of conflicting loads.
memdepunit.memDep.conflictingStores 8 # Number of conflicting stores.
memdepunit.memDep.insertedLoads 783 # Number of loads inserted to the mem dependence unit.
@@ -26,14 +26,14 @@ system.cpu.commit.COM:bw_limited 0 # nu
system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle
system.cpu.commit.COM:committed_per_cycle.samples 28200
system.cpu.commit.COM:committed_per_cycle.min_value 0
- 0 27270 9670.21%
- 1 239 84.75%
- 2 332 117.73%
+ 0 27273 9671.28%
+ 1 240 85.11%
+ 2 328 116.31%
3 127 45.04%
- 4 83 29.43%
+ 4 80 28.37%
5 54 19.15%
- 6 26 9.22%
- 7 18 6.38%
+ 6 28 9.93%
+ 7 19 6.74%
8 51 18.09%
system.cpu.commit.COM:committed_per_cycle.max_value 8
system.cpu.commit.COM:committed_per_cycle.end_dist
@@ -52,14 +52,14 @@ system.cpu.committedInsts_total 2387 # Nu
system.cpu.cpi 315.051529 # CPI: Cycles Per Instruction
system.cpu.cpi_total 315.051529 # CPI: Total CPI of All Threads
system.cpu.dcache.ReadReq_accesses 560 # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 7231.967391 # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 7288.377049 # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency 7232.163043 # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 7288.491803 # average ReadReq mshr miss latency
system.cpu.dcache.ReadReq_hits 468 # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency 665341 # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency 665359 # number of ReadReq miss cycles
system.cpu.dcache.ReadReq_miss_rate 0.164286 # miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_misses 92 # number of ReadReq misses
system.cpu.dcache.ReadReq_mshr_hits 31 # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency 444591 # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency 444598 # number of ReadReq MSHR miss cycles
system.cpu.dcache.ReadReq_mshr_miss_rate 0.108929 # mshr miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_mshr_misses 61 # number of ReadReq MSHR misses
system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses)
@@ -74,37 +74,37 @@ system.cpu.dcache.WriteReq_mshr_miss_latency 157720
system.cpu.dcache.WriteReq_mshr_miss_rate 0.081633 # mshr miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_mshr_misses 24 # number of WriteReq MSHR misses
system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets 2980 # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets 2980.375000 # average number of cycles each access was blocked
system.cpu.dcache.avg_refs 8.141176 # Average number of references to valid blocks.
system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked
system.cpu.dcache.blocked_no_targets 8 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets 23840 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets 23843 # number of cycles access was blocked
system.cpu.dcache.cache_copies 0 # number of cache copies performed
system.cpu.dcache.demand_accesses 854 # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 6979.500000 # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 7086.011765 # average overall mshr miss latency
+system.cpu.dcache.demand_avg_miss_latency 6979.611111 # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 7086.094118 # average overall mshr miss latency
system.cpu.dcache.demand_hits 692 # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency 1130679 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency 1130697 # number of demand (read+write) miss cycles
system.cpu.dcache.demand_miss_rate 0.189696 # miss rate for demand accesses
system.cpu.dcache.demand_misses 162 # number of demand (read+write) misses
system.cpu.dcache.demand_mshr_hits 77 # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency 602311 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency 602318 # number of demand (read+write) MSHR miss cycles
system.cpu.dcache.demand_mshr_miss_rate 0.099532 # mshr miss rate for demand accesses
system.cpu.dcache.demand_mshr_misses 85 # number of demand (read+write) MSHR misses
system.cpu.dcache.fast_writes 0 # number of fast writes performed
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.dcache.overall_accesses 854 # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 6979.500000 # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 7086.011765 # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency 6979.611111 # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 7086.094118 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.dcache.overall_hits 692 # number of overall hits
-system.cpu.dcache.overall_miss_latency 1130679 # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency 1130697 # number of overall miss cycles
system.cpu.dcache.overall_miss_rate 0.189696 # miss rate for overall accesses
system.cpu.dcache.overall_misses 162 # number of overall misses
system.cpu.dcache.overall_mshr_hits 77 # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency 602311 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency 602318 # number of overall MSHR miss cycles
system.cpu.dcache.overall_mshr_miss_rate 0.099532 # mshr miss rate for overall accesses
system.cpu.dcache.overall_mshr_misses 85 # number of overall MSHR misses
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
@@ -125,18 +125,18 @@ system.cpu.dcache.tagsinuse 46.684988 # Cy
system.cpu.dcache.total_refs 692 # Total number of references to valid blocks.
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.dcache.writebacks 0 # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles 21865 # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles 21870 # Number of cycles decode is blocked
system.cpu.decode.DECODE:BranchMispred 79 # Number of times decode detected a branch misprediction
system.cpu.decode.DECODE:BranchResolved 150 # Number of times decode resolved a branch
system.cpu.decode.DECODE:DecodedInsts 4900 # Number of instructions handled by decode
system.cpu.decode.DECODE:IdleCycles 5406 # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles 928 # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles 923 # Number of cycles decode is running
system.cpu.decode.DECODE:SquashCycles 336 # Number of cycles decode is squashing
system.cpu.decode.DECODE:SquashedInsts 286 # Number of squashed instructions handled by decode
system.cpu.decode.DECODE:UnblockCycles 2 # Number of cycles decode is unblocking
system.cpu.fetch.Branches 898 # Number of branches that fetch encountered
system.cpu.fetch.CacheLines 813 # Number of cache lines fetched
-system.cpu.fetch.Cycles 1774 # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles 1769 # Number of cycles fetch has run and was not squashing or blocked
system.cpu.fetch.IcacheSquashes 146 # Number of outstanding Icache misses that were squashed
system.cpu.fetch.Insts 5593 # Number of instructions fetch has processed
system.cpu.fetch.SquashCycles 258 # Number of cycles fetch has spent squashing
@@ -147,27 +147,27 @@ system.cpu.fetch.rate 0.195991 # Nu
system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total)
system.cpu.fetch.rateDist.samples 28537
system.cpu.fetch.rateDist.min_value 0
- 0 27576 9663.24%
+ 0 27581 9665.00%
1 50 17.52%
- 2 92 32.24%
- 3 74 25.93%
- 4 117 41.00%
- 5 71 24.88%
- 6 43 15.07%
+ 2 84 29.44%
+ 3 78 27.33%
+ 4 118 41.35%
+ 5 67 23.48%
+ 6 41 14.37%
7 56 19.62%
- 8 458 160.49%
+ 8 462 161.90%
system.cpu.fetch.rateDist.max_value 8
system.cpu.fetch.rateDist.end_dist
system.cpu.icache.ReadReq_accesses 813 # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 4955.450199 # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 4151.809783 # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency 4955.454183 # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 4151.815217 # average ReadReq mshr miss latency
system.cpu.icache.ReadReq_hits 562 # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency 1243818 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency 1243819 # number of ReadReq miss cycles
system.cpu.icache.ReadReq_miss_rate 0.308733 # miss rate for ReadReq accesses
system.cpu.icache.ReadReq_misses 251 # number of ReadReq misses
system.cpu.icache.ReadReq_mshr_hits 67 # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency 763933 # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency 763934 # number of ReadReq MSHR miss cycles
system.cpu.icache.ReadReq_mshr_miss_rate 0.226322 # mshr miss rate for ReadReq accesses
system.cpu.icache.ReadReq_mshr_misses 184 # number of ReadReq MSHR misses
system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
@@ -179,29 +179,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n
system.cpu.icache.blocked_cycles_no_targets 13780 # number of cycles access was blocked
system.cpu.icache.cache_copies 0 # number of cache copies performed
system.cpu.icache.demand_accesses 813 # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 4955.450199 # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 4151.809783 # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency 4955.454183 # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 4151.815217 # average overall mshr miss latency
system.cpu.icache.demand_hits 562 # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency 1243818 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency 1243819 # number of demand (read+write) miss cycles
system.cpu.icache.demand_miss_rate 0.308733 # miss rate for demand accesses
system.cpu.icache.demand_misses 251 # number of demand (read+write) misses
system.cpu.icache.demand_mshr_hits 67 # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency 763933 # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency 763934 # number of demand (read+write) MSHR miss cycles
system.cpu.icache.demand_mshr_miss_rate 0.226322 # mshr miss rate for demand accesses
system.cpu.icache.demand_mshr_misses 184 # number of demand (read+write) MSHR misses
system.cpu.icache.fast_writes 0 # number of fast writes performed
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.icache.overall_accesses 813 # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 4955.450199 # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 4151.809783 # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency 4955.454183 # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 4151.815217 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.icache.overall_hits 562 # number of overall hits
-system.cpu.icache.overall_miss_latency 1243818 # number of overall miss cycles
+system.cpu.icache.overall_miss_latency 1243819 # number of overall miss cycles
system.cpu.icache.overall_miss_rate 0.308733 # miss rate for overall accesses
system.cpu.icache.overall_misses 251 # number of overall misses
system.cpu.icache.overall_mshr_hits 67 # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency 763933 # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency 763934 # number of overall MSHR miss cycles
system.cpu.icache.overall_mshr_miss_rate 0.226322 # mshr miss rate for overall accesses
system.cpu.icache.overall_mshr_misses 184 # number of overall MSHR misses
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
@@ -231,14 +231,14 @@ system.cpu.iew.EXEC:stores 341 # Nu
system.cpu.iew.EXEC:swp 0 # number of swp insts executed
system.cpu.iew.WB:consumers 1860 # num instructions consuming a value
system.cpu.iew.WB:count 3219 # cumulative count of insts written-back
-system.cpu.iew.WB:fanout 0.785484 # average fanout of values written-back
+system.cpu.iew.WB:fanout 0.786022 # average fanout of values written-back
system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ
system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers 1461 # num instructions producing a value
+system.cpu.iew.WB:producers 1462 # num instructions producing a value
system.cpu.iew.WB:rate 0.112801 # insts written-back per cycle
system.cpu.iew.WB:sent 3234 # cumulative count of insts sent to commit
system.cpu.iew.branchMispredicts 152 # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles 14742 # Number of cycles IEW is blocking
+system.cpu.iew.iewBlockCycles 14743 # Number of cycles IEW is blocking
system.cpu.iew.iewDispLoadInsts 783 # Number of dispatched load instructions
system.cpu.iew.iewDispNonSpecInsts 6 # Number of dispatched non-speculative instructions
system.cpu.iew.iewDispSquashedInsts 79 # Number of squashed instructions skipped by dispatch
@@ -258,11 +258,11 @@ system.cpu.iew.lsq.thread.0.forwLoads 29 # Nu
system.cpu.iew.lsq.thread.0.ignoredResponses 0 # Number of memory responses ignored because the instruction is squashed
system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address
system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation 12 # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation 15 # Number of memory ordering violations
system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled
system.cpu.iew.lsq.thread.0.squashedLoads 368 # Number of loads squashed
system.cpu.iew.lsq.thread.0.squashedStores 87 # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents 12 # Number of memory order violations
+system.cpu.iew.memOrderViolationEvents 15 # Number of memory order violations
system.cpu.iew.predictedNotTakenIncorrect 95 # Number of branches that were predicted not taken incorrectly
system.cpu.iew.predictedTakenIncorrect 57 # Number of branches that were predicted taken incorrectly
system.cpu.ipc 0.003174 # IPC: Instructions Per Cycle
@@ -305,12 +305,12 @@ system.cpu.iq.ISSUE:fu_full.end_dist
system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle
system.cpu.iq.ISSUE:issued_per_cycle.samples 28537
system.cpu.iq.ISSUE:issued_per_cycle.min_value 0
- 0 27012 9465.61%
- 1 616 215.86%
- 2 356 124.75%
+ 0 27014 9466.31%
+ 1 617 216.21%
+ 2 351 123.00%
3 247 86.55%
- 4 177 62.02%
- 5 81 28.38%
+ 4 178 62.38%
+ 5 82 28.73%
6 32 11.21%
7 11 3.85%
8 5 1.75%
@@ -326,12 +326,12 @@ system.cpu.iq.iqSquashedInstsIssued 25 # Nu
system.cpu.iq.iqSquashedNonSpecRemoved 2 # Number of squashed non-spec instructions that were removed
system.cpu.iq.iqSquashedOperandsExamined 801 # Number of squashed operands that are examined and possibly removed from graph
system.cpu.l2cache.ReadReq_accesses 269 # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 4621.724907 # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2296.401487 # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency 1243244 # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency 4621.754647 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2296.408922 # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency 1243252 # number of ReadReq miss cycles
system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_misses 269 # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency 617732 # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency 617734 # number of ReadReq MSHR miss cycles
system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_mshr_misses 269 # number of ReadReq MSHR misses
system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
@@ -343,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 #
system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
system.cpu.l2cache.demand_accesses 269 # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 4621.724907 # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 2296.401487 # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency 4621.754647 # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 2296.408922 # average overall mshr miss latency
system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency 1243244 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency 1243252 # number of demand (read+write) miss cycles
system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
system.cpu.l2cache.demand_misses 269 # number of demand (read+write) misses
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency 617732 # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency 617734 # number of demand (read+write) MSHR miss cycles
system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses
system.cpu.l2cache.demand_mshr_misses 269 # number of demand (read+write) MSHR misses
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
system.cpu.l2cache.overall_accesses 269 # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 4621.724907 # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 2296.401487 # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency 4621.754647 # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 2296.408922 # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.l2cache.overall_hits 0 # number of overall hits
-system.cpu.l2cache.overall_miss_latency 1243244 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency 1243252 # number of overall miss cycles
system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
system.cpu.l2cache.overall_misses 269 # number of overall misses
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency 617732 # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency 617734 # number of overall MSHR miss cycles
system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses
system.cpu.l2cache.overall_mshr_misses 269 # number of overall MSHR misses
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
@@ -387,7 +387,7 @@ system.cpu.l2cache.total_refs 0 # To
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.l2cache.writebacks 0 # number of writebacks
system.cpu.numCycles 28537 # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles 14783 # Number of cycles rename is blocking
+system.cpu.rename.RENAME:BlockCycles 14784 # Number of cycles rename is blocking
system.cpu.rename.RENAME:CommittedMaps 1768 # Number of HB maps that are committed
system.cpu.rename.RENAME:IQFullEvents 18 # Number of times rename has blocked due to IQ full
system.cpu.rename.RENAME:IdleCycles 5489 # Number of cycles rename is idle
@@ -396,11 +396,11 @@ system.cpu.rename.RENAME:ROBFullEvents 2 # Nu
system.cpu.rename.RENAME:RenameLookups 5285 # Number of register rename lookups that rename has made
system.cpu.rename.RENAME:RenamedInsts 4708 # Number of instructions processed by rename
system.cpu.rename.RENAME:RenamedOperands 3399 # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles 852 # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles 847 # Number of cycles rename is running
system.cpu.rename.RENAME:SquashCycles 336 # Number of cycles rename is squashing
system.cpu.rename.RENAME:UnblockCycles 25 # Number of cycles rename is unblocking
system.cpu.rename.RENAME:UndoneMaps 1631 # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles 7052 # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializeStallCycles 7056 # count of cycles rename stalled for serializing inst
system.cpu.rename.RENAME:serializingInsts 8 # count of serializing insts renamed
system.cpu.rename.RENAME:skidInsts 88 # count of insts added to the skid buffer
system.cpu.rename.RENAME:tempSerializingInsts 6 # count of temporary serializing insts renamed
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
index fb2137f1e..313de3c46 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
@@ -1,3 +1,4 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
warn: Entering event queue @ 0. Starting simulation...
+warn: Increasing stack size by one page.
warn: ignoring syscall sigprocmask(1, 18446744073709547831, ...)
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index 6436baf8f..233834343 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,8 +6,9 @@ The Regents of The University of Michigan
All Rights Reserved
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:07:09 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:14 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+Global frequency set at 1000000000000 ticks per second
Exiting @ tick 752028 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index 6eef745b4..e75a10c54 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -1,48 +1,7 @@
[root]
type=Root
children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
[system]
type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
commitToIEWDelay=1
commitToRenameDelay=1
commitWidth=8
+cpu_id=0
decodeToFetchDelay=1
decodeToRenameDelay=1
decodeWidth=8
@@ -433,12 +393,3 @@ range=0:134217727
zero=false
port=system.membus.port[0]
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
index f36f666af..9489e27c0 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
@@ -1,9 +1,6 @@
[root]
type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
[system.physmem]
type=PhysicalMemory
@@ -189,6 +186,7 @@ type=DerivO3CPU
clock=1
phase=0
numThreads=1
+cpu_id=0
activity=0
workload=system.cpu.workload0 system.cpu.workload1
checker=null
@@ -383,51 +381,3 @@ clock=1000
width=64
responder_set=false
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index bb9e9360c..74e8f8d83 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,48 +1,48 @@
---------- Begin Simulation Statistics ----------
global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits 1334 # Number of BTB hits
-global.BPredUnit.BTBLookups 6012 # Number of BTB lookups
+global.BPredUnit.BTBHits 1320 # Number of BTB hits
+global.BPredUnit.BTBLookups 6181 # Number of BTB lookups
global.BPredUnit.RASInCorrect 173 # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect 1201 # Number of conditional branches incorrect
-global.BPredUnit.condPredicted 4031 # Number of conditional branches predicted
-global.BPredUnit.lookups 12370 # Number of BP lookups
-global.BPredUnit.usedRAS 6337 # Number of times the RAS was used to get a target.
-host_inst_rate 11366 # Simulator instruction rate (inst/s)
-host_mem_usage 178064 # Number of bytes of host memory used
-host_seconds 0.99 # Real time elapsed on the host
-host_tick_rate 2259917 # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads 27 # Number of conflicting loads.
-memdepunit.memDep.conflictingLoads 20 # Number of conflicting loads.
-memdepunit.memDep.conflictingStores 97 # Number of conflicting stores.
-memdepunit.memDep.conflictingStores 3 # Number of conflicting stores.
-memdepunit.memDep.insertedLoads 5749 # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads 2822 # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores 4490 # Number of stores inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores 1747 # Number of stores inserted to the mem dependence unit.
+global.BPredUnit.condIncorrect 1181 # Number of conditional branches incorrect
+global.BPredUnit.condPredicted 4228 # Number of conditional branches predicted
+global.BPredUnit.lookups 12535 # Number of BP lookups
+global.BPredUnit.usedRAS 6333 # Number of times the RAS was used to get a target.
+host_inst_rate 6990 # Simulator instruction rate (inst/s)
+host_mem_usage 156628 # Number of bytes of host memory used
+host_seconds 1.61 # Real time elapsed on the host
+host_tick_rate 1386962 # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads 26 # Number of conflicting loads.
+memdepunit.memDep.conflictingLoads 23 # Number of conflicting loads.
+memdepunit.memDep.conflictingStores 4 # Number of conflicting stores.
+memdepunit.memDep.conflictingStores 1 # Number of conflicting stores.
+memdepunit.memDep.insertedLoads 3657 # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads 5285 # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores 1780 # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores 4439 # Number of stores inserted to the mem dependence unit.
sim_freq 1000000000000 # Frequency of simulated ticks
sim_insts 11247 # Number of instructions simulated
sim_seconds 0.000002 # Number of seconds simulated
-sim_ticks 2237162 # Number of ticks simulated
+sim_ticks 2232164 # Number of ticks simulated
system.cpu.commit.COM:branches 1724 # Number of branches committed
system.cpu.commit.COM:branches_0 862 # Number of branches committed
system.cpu.commit.COM:branches_1 862 # Number of branches committed
-system.cpu.commit.COM:bw_lim_events 128 # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events 123 # number cycles where commit BW limit reached
system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits
system.cpu.commit.COM:bw_limited_0 0 # number of insts not committed due to BW limits
system.cpu.commit.COM:bw_limited_1 0 # number of insts not committed due to BW limits
system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples 188940
+system.cpu.commit.COM:committed_per_cycle.samples 189138
system.cpu.commit.COM:committed_per_cycle.min_value 0
- 0 183303 9701.65%
- 1 3121 165.18%
- 2 1239 65.58%
- 3 531 28.10%
- 4 275 14.55%
- 5 154 8.15%
- 6 128 6.77%
+ 0 183476 9700.64%
+ 1 3161 167.13%
+ 2 1212 64.08%
+ 3 544 28.76%
+ 4 279 14.75%
+ 5 155 8.20%
+ 6 127 6.71%
7 61 3.23%
- 8 128 6.77%
+ 8 123 6.50%
system.cpu.commit.COM:committed_per_cycle.max_value 8
system.cpu.commit.COM:committed_per_cycle.end_dist
@@ -61,97 +61,97 @@ system.cpu.commit.COM:refs_1 1791 # Nu
system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed
system.cpu.commit.COM:swp_count_0 0 # Number of s/w prefetches committed
system.cpu.commit.COM:swp_count_1 0 # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts 943 # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts 938 # The number of times a branch was mispredicted
system.cpu.commit.commitCommittedInsts 11281 # The number of committed instructions
system.cpu.commit.commitNonSpecStalls 34 # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts 28509 # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts 29588 # The number of squashed insts skipped by commit
system.cpu.committedInsts_0 5624 # Number of Instructions Simulated
system.cpu.committedInsts_1 5623 # Number of Instructions Simulated
system.cpu.committedInsts_total 11247 # Number of Instructions Simulated
-system.cpu.cpi_0 397.788407 # CPI: Cycles Per Instruction
-system.cpu.cpi_1 397.859150 # CPI: Cycles Per Instruction
-system.cpu.cpi_total 198.911888 # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses 3186 # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0 3186 # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 9969.378125 # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_miss_latency_0 9969.378125 # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10500.608040 # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10500.608040 # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits 2866 # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0 2866 # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency 3190201 # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0 3190201 # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate 0.100439 # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_miss_rate_0 0.100439 # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses 320 # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0 320 # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits 121 # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0 121 # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency 2089621 # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0 2089621 # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate 0.062461 # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.062461 # mshr miss rate for ReadReq accesses
+system.cpu.cpi_0 396.899716 # CPI: Cycles Per Instruction
+system.cpu.cpi_1 396.970301 # CPI: Cycles Per Instruction
+system.cpu.cpi_total 198.467502 # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses 3176 # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0 3176 # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 9976.257143 # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency_0 9976.257143 # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10425.356784 # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10425.356784 # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits 2861 # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0 2861 # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency 3142521 # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0 3142521 # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate 0.099181 # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate_0 0.099181 # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses 315 # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0 315 # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits 116 # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0 116 # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency 2074646 # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0 2074646 # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate 0.062657 # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.062657 # mshr miss rate for ReadReq accesses
system.cpu.dcache.ReadReq_mshr_misses 199 # number of ReadReq MSHR misses
system.cpu.dcache.ReadReq_mshr_misses_0 199 # number of ReadReq MSHR misses
system.cpu.dcache.WriteReq_accesses 1624 # number of WriteReq accesses(hits+misses)
system.cpu.dcache.WriteReq_accesses_0 1624 # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 6540.875740 # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_miss_latency_0 6540.875740 # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7803.746575 # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 7803.746575 # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 6512.846154 # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency_0 6512.846154 # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7776.006849 # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 7776.006849 # average WriteReq mshr miss latency
system.cpu.dcache.WriteReq_hits 1117 # number of WriteReq hits
system.cpu.dcache.WriteReq_hits_0 1117 # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency 3316224 # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0 3316224 # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency 3302013 # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0 3302013 # number of WriteReq miss cycles
system.cpu.dcache.WriteReq_miss_rate 0.312192 # miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_miss_rate_0 0.312192 # miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_misses 507 # number of WriteReq misses
system.cpu.dcache.WriteReq_misses_0 507 # number of WriteReq misses
system.cpu.dcache.WriteReq_mshr_hits 361 # number of WriteReq MSHR hits
system.cpu.dcache.WriteReq_mshr_hits_0 361 # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency 1139347 # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0 1139347 # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency 1135297 # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0 1135297 # number of WriteReq MSHR miss cycles
system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.089901 # mshr miss rate for WriteReq accesses
system.cpu.dcache.WriteReq_mshr_misses 146 # number of WriteReq MSHR misses
system.cpu.dcache.WriteReq_mshr_misses_0 146 # number of WriteReq MSHR misses
system.cpu.dcache.avg_blocked_cycles_no_mshrs 3973 # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets 3625.380952 # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs 11.544928 # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets 3613.488095 # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs 11.563953 # Average number of references to valid blocks.
system.cpu.dcache.blocked_no_mshrs 1 # number of cycles access was blocked
system.cpu.dcache.blocked_no_targets 84 # number of cycles access was blocked
system.cpu.dcache.blocked_cycles_no_mshrs 3973 # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets 304532 # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets 303533 # number of cycles access was blocked
system.cpu.dcache.cache_copies 0 # number of cache copies performed
-system.cpu.dcache.demand_accesses 4810 # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0 4810 # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses 4800 # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0 4800 # number of demand (read+write) accesses
system.cpu.dcache.demand_accesses_1 0 # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 7867.503023 # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0 7867.503023 # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency 7840.065693 # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0 7840.065693 # average overall miss latency
system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 9359.327536 # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0 9359.327536 # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 9304.182609 # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0 9304.182609 # average overall mshr miss latency
system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
-system.cpu.dcache.demand_hits 3983 # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0 3983 # number of demand (read+write) hits
+system.cpu.dcache.demand_hits 3978 # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0 3978 # number of demand (read+write) hits
system.cpu.dcache.demand_hits_1 0 # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency 6506425 # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0 6506425 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency 6444534 # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0 6444534 # number of demand (read+write) miss cycles
system.cpu.dcache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate 0.171933 # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0 0.171933 # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate 0.171250 # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0 0.171250 # miss rate for demand accesses
system.cpu.dcache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses
-system.cpu.dcache.demand_misses 827 # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0 827 # number of demand (read+write) misses
+system.cpu.dcache.demand_misses 822 # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0 822 # number of demand (read+write) misses
system.cpu.dcache.demand_misses_1 0 # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits 482 # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0 482 # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits 477 # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0 477 # number of demand (read+write) MSHR hits
system.cpu.dcache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency 3228968 # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0 3228968 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency 3209943 # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0 3209943 # number of demand (read+write) MSHR miss cycles
system.cpu.dcache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate 0.071726 # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0 0.071726 # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate 0.071875 # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0 0.071875 # mshr miss rate for demand accesses
system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses
system.cpu.dcache.demand_mshr_misses 345 # number of demand (read+write) MSHR misses
system.cpu.dcache.demand_mshr_misses_0 345 # number of demand (read+write) MSHR misses
@@ -161,38 +161,38 @@ system.cpu.dcache.mshr_cap_events 0 # nu
system.cpu.dcache.mshr_cap_events_0 0 # number of times MSHR cap was activated
system.cpu.dcache.mshr_cap_events_1 0 # number of times MSHR cap was activated
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses 4810 # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0 4810 # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses 4800 # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0 4800 # number of overall (read+write) accesses
system.cpu.dcache.overall_accesses_1 0 # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 7867.503023 # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0 7867.503023 # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency 7840.065693 # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0 7840.065693 # average overall miss latency
system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 9359.327536 # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0 9359.327536 # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 9304.182609 # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0 9304.182609 # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency
system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits 3983 # number of overall hits
-system.cpu.dcache.overall_hits_0 3983 # number of overall hits
+system.cpu.dcache.overall_hits 3978 # number of overall hits
+system.cpu.dcache.overall_hits_0 3978 # number of overall hits
system.cpu.dcache.overall_hits_1 0 # number of overall hits
-system.cpu.dcache.overall_miss_latency 6506425 # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0 6506425 # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency 6444534 # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0 6444534 # number of overall miss cycles
system.cpu.dcache.overall_miss_latency_1 0 # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate 0.171933 # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0 0.171933 # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate 0.171250 # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0 0.171250 # miss rate for overall accesses
system.cpu.dcache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses
-system.cpu.dcache.overall_misses 827 # number of overall misses
-system.cpu.dcache.overall_misses_0 827 # number of overall misses
+system.cpu.dcache.overall_misses 822 # number of overall misses
+system.cpu.dcache.overall_misses_0 822 # number of overall misses
system.cpu.dcache.overall_misses_1 0 # number of overall misses
-system.cpu.dcache.overall_mshr_hits 482 # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0 482 # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits 477 # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0 477 # number of overall MSHR hits
system.cpu.dcache.overall_mshr_hits_1 0 # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency 3228968 # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0 3228968 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency 3209943 # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0 3209943 # number of overall MSHR miss cycles
system.cpu.dcache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate 0.071726 # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0 0.071726 # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate 0.071875 # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0 0.071875 # mshr miss rate for overall accesses
system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses
system.cpu.dcache.overall_mshr_misses 345 # number of overall MSHR misses
system.cpu.dcache.overall_mshr_misses_0 345 # number of overall MSHR misses
@@ -215,153 +215,153 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.dcache.replacements 0 # number of replacements
system.cpu.dcache.replacements_0 0 # number of replacements
system.cpu.dcache.replacements_1 0 # number of replacements
-system.cpu.dcache.sampled_refs 345 # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs 344 # Sample count of references to valid blocks.
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.dcache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions
system.cpu.dcache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse 198.670475 # Cycle average of tags in use
-system.cpu.dcache.total_refs 3983 # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse 198.340517 # Cycle average of tags in use
+system.cpu.dcache.total_refs 3978 # Total number of references to valid blocks.
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.dcache.writebacks 0 # number of writebacks
system.cpu.dcache.writebacks_0 0 # number of writebacks
system.cpu.dcache.writebacks_1 0 # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles 97618 # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred 267 # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved 390 # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts 67048 # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles 262280 # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles 12122 # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles 5552 # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts 680 # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles 155 # Number of cycles decode is unblocking
-system.cpu.fetch.Branches 12370 # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines 13012 # Number of cache lines fetched
-system.cpu.fetch.Cycles 27804 # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes 800 # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts 79582 # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles 4833 # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate 0.065467 # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles 52787 # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches 7671 # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate 0.421180 # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles 95932 # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred 257 # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved 378 # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts 68233 # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles 264032 # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles 12255 # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles 5733 # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts 618 # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:UnblockCycles 167 # Number of cycles decode is unblocking
+system.cpu.fetch.Branches 12535 # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines 13184 # Number of cache lines fetched
+system.cpu.fetch.Cycles 28123 # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes 886 # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts 80687 # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles 4911 # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate 0.066271 # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles 53960 # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches 7653 # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate 0.426584 # Number of inst fetches per cycle
system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples 188950
+system.cpu.fetch.rateDist.samples 189147
system.cpu.fetch.rateDist.min_value 0
- 0 174142 9216.30%
- 1 378 20.01%
- 2 298 15.77%
- 3 3656 193.49%
- 4 2200 116.43%
- 5 1017 53.82%
- 6 974 51.55%
- 7 2369 125.38%
- 8 3916 207.25%
+ 0 174193 9209.40%
+ 1 369 19.51%
+ 2 281 14.86%
+ 3 3638 192.34%
+ 4 2283 120.70%
+ 5 1005 53.13%
+ 6 984 52.02%
+ 7 2371 125.35%
+ 8 4023 212.69%
system.cpu.fetch.rateDist.max_value 8
system.cpu.fetch.rateDist.end_dist
-system.cpu.icache.ReadReq_accesses 13010 # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0 13010 # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 7746.912281 # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_miss_latency_0 7746.912281 # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 7155.055556 # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 7155.055556 # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits 12098 # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0 12098 # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency 7065184 # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0 7065184 # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate 0.070100 # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_miss_rate_0 0.070100 # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses 13182 # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0 13182 # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 7732.322368 # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency_0 7732.322368 # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 7128.205742 # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 7128.205742 # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits 12270 # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0 12270 # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency 7051878 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0 7051878 # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate 0.069185 # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate_0 0.069185 # miss rate for ReadReq accesses
system.cpu.icache.ReadReq_misses 912 # number of ReadReq misses
system.cpu.icache.ReadReq_misses_0 912 # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits 282 # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0 282 # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency 4507685 # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0 4507685 # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate 0.048424 # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_miss_rate_0 0.048424 # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses 630 # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0 630 # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_hits 285 # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0 285 # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency 4469385 # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0 4469385 # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate 0.047565 # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate_0 0.047565 # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses 627 # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0 627 # number of ReadReq MSHR misses
system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets 5648.647059 # average number of cycles each access was blocked
-system.cpu.icache.avg_refs 19.203175 # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets 5603.944444 # average number of cycles each access was blocked
+system.cpu.icache.avg_refs 19.569378 # Average number of references to valid blocks.
system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets 17 # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets 18 # number of cycles access was blocked
system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets 96027 # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets 100871 # number of cycles access was blocked
system.cpu.icache.cache_copies 0 # number of cache copies performed
-system.cpu.icache.demand_accesses 13010 # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0 13010 # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses 13182 # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0 13182 # number of demand (read+write) accesses
system.cpu.icache.demand_accesses_1 0 # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 7746.912281 # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0 7746.912281 # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency 7732.322368 # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0 7732.322368 # average overall miss latency
system.cpu.icache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 7155.055556 # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0 7155.055556 # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 7128.205742 # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0 7128.205742 # average overall mshr miss latency
system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
-system.cpu.icache.demand_hits 12098 # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0 12098 # number of demand (read+write) hits
+system.cpu.icache.demand_hits 12270 # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0 12270 # number of demand (read+write) hits
system.cpu.icache.demand_hits_1 0 # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency 7065184 # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0 7065184 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency 7051878 # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0 7051878 # number of demand (read+write) miss cycles
system.cpu.icache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate 0.070100 # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0 0.070100 # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate 0.069185 # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0 0.069185 # miss rate for demand accesses
system.cpu.icache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses
system.cpu.icache.demand_misses 912 # number of demand (read+write) misses
system.cpu.icache.demand_misses_0 912 # number of demand (read+write) misses
system.cpu.icache.demand_misses_1 0 # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits 282 # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0 282 # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits 285 # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0 285 # number of demand (read+write) MSHR hits
system.cpu.icache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency 4507685 # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0 4507685 # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency 4469385 # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0 4469385 # number of demand (read+write) MSHR miss cycles
system.cpu.icache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate 0.048424 # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0 0.048424 # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate 0.047565 # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0 0.047565 # mshr miss rate for demand accesses
system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses 630 # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0 630 # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses 627 # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0 627 # number of demand (read+write) MSHR misses
system.cpu.icache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses
system.cpu.icache.fast_writes 0 # number of fast writes performed
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.icache.mshr_cap_events_0 0 # number of times MSHR cap was activated
system.cpu.icache.mshr_cap_events_1 0 # number of times MSHR cap was activated
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses 13010 # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0 13010 # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses 13182 # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0 13182 # number of overall (read+write) accesses
system.cpu.icache.overall_accesses_1 0 # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 7746.912281 # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0 7746.912281 # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency 7732.322368 # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0 7732.322368 # average overall miss latency
system.cpu.icache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 7155.055556 # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0 7155.055556 # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 7128.205742 # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0 7128.205742 # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency
system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0> # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits 12098 # number of overall hits
-system.cpu.icache.overall_hits_0 12098 # number of overall hits
+system.cpu.icache.overall_hits 12270 # number of overall hits
+system.cpu.icache.overall_hits_0 12270 # number of overall hits
system.cpu.icache.overall_hits_1 0 # number of overall hits
-system.cpu.icache.overall_miss_latency 7065184 # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0 7065184 # number of overall miss cycles
+system.cpu.icache.overall_miss_latency 7051878 # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0 7051878 # number of overall miss cycles
system.cpu.icache.overall_miss_latency_1 0 # number of overall miss cycles
-system.cpu.icache.overall_miss_rate 0.070100 # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0 0.070100 # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate 0.069185 # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0 0.069185 # miss rate for overall accesses
system.cpu.icache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses
system.cpu.icache.overall_misses 912 # number of overall misses
system.cpu.icache.overall_misses_0 912 # number of overall misses
system.cpu.icache.overall_misses_1 0 # number of overall misses
-system.cpu.icache.overall_mshr_hits 282 # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0 282 # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits 285 # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0 285 # number of overall MSHR hits
system.cpu.icache.overall_mshr_hits_1 0 # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency 4507685 # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0 4507685 # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency 4469385 # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0 4469385 # number of overall MSHR miss cycles
system.cpu.icache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate 0.048424 # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0 0.048424 # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate 0.047565 # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0 0.047565 # mshr miss rate for overall accesses
system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses 630 # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0 630 # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses 627 # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0 627 # number of overall MSHR misses
system.cpu.icache.overall_mshr_misses_1 0 # number of overall MSHR misses
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.icache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles
@@ -381,138 +381,138 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.icache.replacements 6 # number of replacements
system.cpu.icache.replacements_0 6 # number of replacements
system.cpu.icache.replacements_1 0 # number of replacements
-system.cpu.icache.sampled_refs 630 # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs 627 # Sample count of references to valid blocks.
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.icache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions
system.cpu.icache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse 289.377534 # Cycle average of tags in use
-system.cpu.icache.total_refs 12098 # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse 288.361956 # Cycle average of tags in use
+system.cpu.icache.total_refs 12270 # Total number of references to valid blocks.
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.icache.writebacks 0 # number of writebacks
system.cpu.icache.writebacks_0 0 # number of writebacks
system.cpu.icache.writebacks_1 0 # number of writebacks
-system.cpu.idleCycles 2048213 # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches 4035 # Number of branches executed
-system.cpu.iew.EXEC:branches_0 2458 # Number of branches executed
-system.cpu.iew.EXEC:branches_1 1577 # Number of branches executed
+system.cpu.idleCycles 2043018 # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches 4024 # Number of branches executed
+system.cpu.iew.EXEC:branches_0 1569 # Number of branches executed
+system.cpu.iew.EXEC:branches_1 2455 # Number of branches executed
system.cpu.iew.EXEC:nop 84 # number of nop insts executed
system.cpu.iew.EXEC:nop_0 42 # number of nop insts executed
system.cpu.iew.EXEC:nop_1 42 # number of nop insts executed
-system.cpu.iew.EXEC:rate 0.142196 # Inst execution rate
-system.cpu.iew.EXEC:refs 10960 # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0 7253 # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1 3707 # number of memory reference insts executed
-system.cpu.iew.EXEC:stores 3812 # Number of stores executed
-system.cpu.iew.EXEC:stores_0 2509 # Number of stores executed
-system.cpu.iew.EXEC:stores_1 1303 # Number of stores executed
+system.cpu.iew.EXEC:rate 0.144523 # Inst execution rate
+system.cpu.iew.EXEC:refs 11361 # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0 4575 # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1 6786 # number of memory reference insts executed
+system.cpu.iew.EXEC:stores 3833 # Number of stores executed
+system.cpu.iew.EXEC:stores_0 1337 # Number of stores executed
+system.cpu.iew.EXEC:stores_1 2496 # Number of stores executed
system.cpu.iew.EXEC:swp 0 # number of swp insts executed
system.cpu.iew.EXEC:swp_0 0 # number of swp insts executed
system.cpu.iew.EXEC:swp_1 0 # number of swp insts executed
-system.cpu.iew.WB:consumers 12377 # num instructions consuming a value
-system.cpu.iew.WB:consumers_0 6652 # num instructions consuming a value
-system.cpu.iew.WB:consumers_1 5725 # num instructions consuming a value
-system.cpu.iew.WB:count 22520 # cumulative count of insts written-back
-system.cpu.iew.WB:count_0 12790 # cumulative count of insts written-back
-system.cpu.iew.WB:count_1 9730 # cumulative count of insts written-back
-system.cpu.iew.WB:fanout 0.808516 # average fanout of values written-back
-system.cpu.iew.WB:fanout_0 0.819753 # average fanout of values written-back
-system.cpu.iew.WB:fanout_1 0.795459 # average fanout of values written-back
+system.cpu.iew.WB:consumers 12385 # num instructions consuming a value
+system.cpu.iew.WB:consumers_0 5750 # num instructions consuming a value
+system.cpu.iew.WB:consumers_1 6635 # num instructions consuming a value
+system.cpu.iew.WB:count 22604 # cumulative count of insts written-back
+system.cpu.iew.WB:count_0 10240 # cumulative count of insts written-back
+system.cpu.iew.WB:count_1 12364 # cumulative count of insts written-back
+system.cpu.iew.WB:fanout 0.811385 # average fanout of values written-back
+system.cpu.iew.WB:fanout_0 0.800522 # average fanout of values written-back
+system.cpu.iew.WB:fanout_1 0.820799 # average fanout of values written-back
system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ
system.cpu.iew.WB:penalized_0 0 # number of instrctions required to write to 'other' IQ
system.cpu.iew.WB:penalized_1 0 # number of instrctions required to write to 'other' IQ
system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ
system.cpu.iew.WB:penalized_rate_0 0 # fraction of instructions written-back that wrote to 'other' IQ
system.cpu.iew.WB:penalized_rate_1 0 # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers 10007 # num instructions producing a value
-system.cpu.iew.WB:producers_0 5453 # num instructions producing a value
-system.cpu.iew.WB:producers_1 4554 # num instructions producing a value
-system.cpu.iew.WB:rate 0.119185 # insts written-back per cycle
-system.cpu.iew.WB:rate_0 0.067690 # insts written-back per cycle
-system.cpu.iew.WB:rate_1 0.051495 # insts written-back per cycle
-system.cpu.iew.WB:sent 22674 # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0 12874 # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1 9800 # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts 1030 # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles 62040 # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts 8571 # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts 42 # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts 5358 # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts 6237 # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts 39780 # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts 7148 # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0 4744 # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1 2404 # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts 903 # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts 26868 # Number of executed instructions
-system.cpu.iew.iewIQFullEvents 44 # Number of times the IQ has become full, causing a stall
+system.cpu.iew.WB:producers 10049 # num instructions producing a value
+system.cpu.iew.WB:producers_0 4603 # num instructions producing a value
+system.cpu.iew.WB:producers_1 5446 # num instructions producing a value
+system.cpu.iew.WB:rate 0.119505 # insts written-back per cycle
+system.cpu.iew.WB:rate_0 0.054138 # insts written-back per cycle
+system.cpu.iew.WB:rate_1 0.065367 # insts written-back per cycle
+system.cpu.iew.WB:sent 22763 # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0 10322 # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1 12441 # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts 1027 # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles 60103 # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts 8942 # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts 41 # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts 5344 # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts 6219 # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts 40858 # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts 7528 # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0 3238 # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1 4290 # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts 872 # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts 27336 # Number of executed instructions
+system.cpu.iew.iewIQFullEvents 45 # Number of times the IQ has become full, causing a stall
system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle
-system.cpu.iew.iewLSQFullEvents 2 # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles 5552 # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles 117 # Number of cycles IEW is unblocking
-system.cpu.iew.lsq.thread.0.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked 3088 # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads 64 # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.0.ignoredResponses 6 # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.iewLSQFullEvents 4 # Number of times the LSQ has become full, causing a stall
+system.cpu.iew.iewSquashCycles 5733 # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles 122 # Number of cycles IEW is unblocking
+system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.cacheBlocked 1584 # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.forwLoads 65 # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses 10 # Number of memory responses ignored because the instruction is squashed
system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address
system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation 34 # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation 56 # Number of memory ordering violations
system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads 4770 # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores 3678 # Number of stores squashed
-system.cpu.iew.lsq.thread.1.blockedLoads 1 # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.1.cacheBlocked 756 # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads 64 # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses 10 # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.squashedLoads 2678 # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores 968 # Number of stores squashed
+system.cpu.iew.lsq.thread.1.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.1.cacheBlocked 2643 # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.1.forwLoads 67 # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.ignoredResponses 7 # Number of memory responses ignored because the instruction is squashed
system.cpu.iew.lsq.thread.1.invAddrLoads 0 # Number of loads ignored due to an invalid address
system.cpu.iew.lsq.thread.1.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation 29 # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation 54 # Number of memory ordering violations
system.cpu.iew.lsq.thread.1.rescheduledLoads 1 # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads 1843 # Number of loads squashed
-system.cpu.iew.lsq.thread.1.squashedStores 935 # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents 63 # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect 798 # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect 232 # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0 0.002514 # IPC: Instructions Per Cycle
-system.cpu.ipc_1 0.002513 # IPC: Instructions Per Cycle
-system.cpu.ipc_total 0.005027 # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0 16536 # Type of FU issued
+system.cpu.iew.lsq.thread.1.squashedLoads 4306 # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedStores 3627 # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents 110 # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect 796 # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect 231 # Number of branches that were predicted taken incorrectly
+system.cpu.ipc_0 0.002520 # IPC: Instructions Per Cycle
+system.cpu.ipc_1 0.002519 # IPC: Instructions Per Cycle
+system.cpu.ipc_total 0.005039 # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0 12578 # Type of FU issued
system.cpu.iq.ISSUE:FU_type_0.start_dist
- (null) 2 0.01% # Type of FU issued
- IntAlu 9136 55.25% # Type of FU issued
+ (null) 2 0.02% # Type of FU issued
+ IntAlu 7865 62.53% # Type of FU issued
IntMult 1 0.01% # Type of FU issued
IntDiv 0 0.00% # Type of FU issued
- FloatAdd 2 0.01% # Type of FU issued
+ FloatAdd 2 0.02% # Type of FU issued
FloatCmp 0 0.00% # Type of FU issued
FloatCvt 0 0.00% # Type of FU issued
FloatMult 0 0.00% # Type of FU issued
FloatDiv 0 0.00% # Type of FU issued
FloatSqrt 0 0.00% # Type of FU issued
- MemRead 4850 29.33% # Type of FU issued
- MemWrite 2545 15.39% # Type of FU issued
+ MemRead 3344 26.59% # Type of FU issued
+ MemWrite 1364 10.84% # Type of FU issued
IprAccess 0 0.00% # Type of FU issued
InstPrefetch 0 0.00% # Type of FU issued
system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1 11235 # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1 15630 # Type of FU issued
system.cpu.iq.ISSUE:FU_type_1.start_dist
- (null) 2 0.02% # Type of FU issued
- IntAlu 7383 65.71% # Type of FU issued
+ (null) 2 0.01% # Type of FU issued
+ IntAlu 8707 55.71% # Type of FU issued
IntMult 1 0.01% # Type of FU issued
IntDiv 0 0.00% # Type of FU issued
- FloatAdd 2 0.02% # Type of FU issued
+ FloatAdd 2 0.01% # Type of FU issued
FloatCmp 0 0.00% # Type of FU issued
FloatCvt 0 0.00% # Type of FU issued
FloatMult 0 0.00% # Type of FU issued
FloatDiv 0 0.00% # Type of FU issued
FloatSqrt 0 0.00% # Type of FU issued
- MemRead 2518 22.41% # Type of FU issued
- MemWrite 1329 11.83% # Type of FU issued
+ MemRead 4394 28.11% # Type of FU issued
+ MemWrite 2524 16.15% # Type of FU issued
IprAccess 0 0.00% # Type of FU issued
InstPrefetch 0 0.00% # Type of FU issued
system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type 27771 # Type of FU issued
+system.cpu.iq.ISSUE:FU_type 28208 # Type of FU issued
system.cpu.iq.ISSUE:FU_type.start_dist
(null) 4 0.01% # Type of FU issued
- IntAlu 16519 59.48% # Type of FU issued
+ IntAlu 16572 58.75% # Type of FU issued
IntMult 2 0.01% # Type of FU issued
IntDiv 0 0.00% # Type of FU issued
FloatAdd 4 0.01% # Type of FU issued
@@ -521,20 +521,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist
FloatMult 0 0.00% # Type of FU issued
FloatDiv 0 0.00% # Type of FU issued
FloatSqrt 0 0.00% # Type of FU issued
- MemRead 7368 26.53% # Type of FU issued
- MemWrite 3874 13.95% # Type of FU issued
+ MemRead 7738 27.43% # Type of FU issued
+ MemWrite 3888 13.78% # Type of FU issued
IprAccess 0 0.00% # Type of FU issued
InstPrefetch 0 0.00% # Type of FU issued
system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt 146 # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0 73 # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1 73 # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate 0.005257 # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0 0.002629 # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1 0.002629 # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt 149 # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0 72 # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1 77 # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate 0.005282 # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0 0.002552 # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1 0.002730 # FU busy rate (busy events/executed inst)
system.cpu.iq.ISSUE:fu_full.start_dist
(null) 0 0.00% # attempts to use FU when none available
- IntAlu 0 0.00% # attempts to use FU when none available
+ IntAlu 1 0.67% # attempts to use FU when none available
IntMult 0 0.00% # attempts to use FU when none available
IntDiv 0 0.00% # attempts to use FU when none available
FloatAdd 0 0.00% # attempts to use FU when none available
@@ -543,52 +543,52 @@ system.cpu.iq.ISSUE:fu_full.start_dist
FloatMult 0 0.00% # attempts to use FU when none available
FloatDiv 0 0.00% # attempts to use FU when none available
FloatSqrt 0 0.00% # attempts to use FU when none available
- MemRead 83 56.85% # attempts to use FU when none available
- MemWrite 63 43.15% # attempts to use FU when none available
+ MemRead 83 55.70% # attempts to use FU when none available
+ MemWrite 65 43.62% # attempts to use FU when none available
IprAccess 0 0.00% # attempts to use FU when none available
InstPrefetch 0 0.00% # attempts to use FU when none available
system.cpu.iq.ISSUE:fu_full.end_dist
system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples 188950
+system.cpu.iq.ISSUE:issued_per_cycle.samples 189147
system.cpu.iq.ISSUE:issued_per_cycle.min_value 0
- 0 174613 9241.23%
- 1 6958 368.25%
- 2 3428 181.42%
- 3 2696 142.68%
- 4 636 33.66%
- 5 439 23.23%
- 6 143 7.57%
- 7 24 1.27%
- 8 13 0.69%
+ 0 174626 9232.29%
+ 1 7072 373.89%
+ 2 3403 179.91%
+ 3 2709 143.22%
+ 4 713 37.70%
+ 5 443 23.42%
+ 6 143 7.56%
+ 7 26 1.37%
+ 8 12 0.63%
system.cpu.iq.ISSUE:issued_per_cycle.max_value 8
system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate 0.146975 # Inst issue rate
-system.cpu.iq.iqInstsAdded 39654 # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued 27771 # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded 42 # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined 27426 # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued 185 # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved 8 # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined 20011 # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses 973 # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0 973 # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 6750.932169 # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_miss_latency_0 6750.932169 # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 3603.773895 # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 3603.773895 # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency 6568657 # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0 6568657 # number of ReadReq miss cycles
+system.cpu.iq.ISSUE:rate 0.149133 # Inst issue rate
+system.cpu.iq.iqInstsAdded 40733 # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued 28208 # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded 41 # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined 28495 # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued 192 # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved 7 # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined 21369 # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses 970 # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0 970 # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 6748.795876 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency_0 6748.795876 # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 3604.818557 # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 3604.818557 # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency 6546332 # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0 6546332 # number of ReadReq miss cycles
system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_miss_rate_0 1 # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses 973 # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0 973 # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency 3506472 # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0 3506472 # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses 970 # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0 970 # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency 3496674 # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0 3496674 # number of ReadReq MSHR miss cycles
system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses
system.cpu.l2cache.ReadReq_mshr_miss_rate_0 1 # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses 973 # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0 973 # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses 970 # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0 970 # number of ReadReq MSHR misses
system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks.
@@ -597,52 +597,52 @@ system.cpu.l2cache.blocked_no_targets 0 # nu
system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
-system.cpu.l2cache.demand_accesses 973 # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0 973 # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses 970 # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0 970 # number of demand (read+write) accesses
system.cpu.l2cache.demand_accesses_1 0 # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 6750.932169 # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0 6750.932169 # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency 6748.795876 # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0 6748.795876 # average overall miss latency
system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 3603.773895 # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0 3603.773895 # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 3604.818557 # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0 3604.818557 # average overall mshr miss latency
system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
system.cpu.l2cache.demand_hits_0 0 # number of demand (read+write) hits
system.cpu.l2cache.demand_hits_1 0 # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency 6568657 # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0 6568657 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency 6546332 # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0 6546332 # number of demand (read+write) miss cycles
system.cpu.l2cache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles
system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
system.cpu.l2cache.demand_miss_rate_0 1 # miss rate for demand accesses
system.cpu.l2cache.demand_miss_rate_1 <err: div-0> # miss rate for demand accesses
-system.cpu.l2cache.demand_misses 973 # number of demand (read+write) misses
-system.cpu.l2cache.demand_misses_0 973 # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses 970 # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses_0 970 # number of demand (read+write) misses
system.cpu.l2cache.demand_misses_1 0 # number of demand (read+write) misses
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
system.cpu.l2cache.demand_mshr_hits_0 0 # number of demand (read+write) MSHR hits
system.cpu.l2cache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency 3506472 # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0 3506472 # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency 3496674 # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0 3496674 # number of demand (read+write) MSHR miss cycles
system.cpu.l2cache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles
system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses
system.cpu.l2cache.demand_mshr_miss_rate_0 1 # mshr miss rate for demand accesses
system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0> # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses 973 # number of demand (read+write) MSHR misses
-system.cpu.l2cache.demand_mshr_misses_0 973 # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses 970 # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses_0 970 # number of demand (read+write) MSHR misses
system.cpu.l2cache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
system.cpu.l2cache.mshr_cap_events_0 0 # number of times MSHR cap was activated
system.cpu.l2cache.mshr_cap_events_1 0 # number of times MSHR cap was activated
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses 973 # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0 973 # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses 970 # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0 970 # number of overall (read+write) accesses
system.cpu.l2cache.overall_accesses_1 0 # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 6750.932169 # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0 6750.932169 # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency 6748.795876 # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0 6748.795876 # average overall miss latency
system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0> # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 3603.773895 # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0 3603.773895 # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 3604.818557 # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0 3604.818557 # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0> # average overall mshr miss latency
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0> # average overall mshr uncacheable latency
@@ -650,26 +650,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>
system.cpu.l2cache.overall_hits 0 # number of overall hits
system.cpu.l2cache.overall_hits_0 0 # number of overall hits
system.cpu.l2cache.overall_hits_1 0 # number of overall hits
-system.cpu.l2cache.overall_miss_latency 6568657 # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0 6568657 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency 6546332 # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0 6546332 # number of overall miss cycles
system.cpu.l2cache.overall_miss_latency_1 0 # number of overall miss cycles
system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
system.cpu.l2cache.overall_miss_rate_0 1 # miss rate for overall accesses
system.cpu.l2cache.overall_miss_rate_1 <err: div-0> # miss rate for overall accesses
-system.cpu.l2cache.overall_misses 973 # number of overall misses
-system.cpu.l2cache.overall_misses_0 973 # number of overall misses
+system.cpu.l2cache.overall_misses 970 # number of overall misses
+system.cpu.l2cache.overall_misses_0 970 # number of overall misses
system.cpu.l2cache.overall_misses_1 0 # number of overall misses
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
system.cpu.l2cache.overall_mshr_hits_0 0 # number of overall MSHR hits
system.cpu.l2cache.overall_mshr_hits_1 0 # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency 3506472 # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0 3506472 # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency 3496674 # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0 3496674 # number of overall MSHR miss cycles
system.cpu.l2cache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles
system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses
system.cpu.l2cache.overall_mshr_miss_rate_0 1 # mshr miss rate for overall accesses
system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0> # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses 973 # number of overall MSHR misses
-system.cpu.l2cache.overall_mshr_misses_0 973 # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses 970 # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses_0 970 # number of overall MSHR misses
system.cpu.l2cache.overall_mshr_misses_1 0 # number of overall MSHR misses
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
system.cpu.l2cache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles
@@ -689,35 +689,35 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0
system.cpu.l2cache.replacements 0 # number of replacements
system.cpu.l2cache.replacements_0 0 # number of replacements
system.cpu.l2cache.replacements_1 0 # number of replacements
-system.cpu.l2cache.sampled_refs 973 # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs 969 # Sample count of references to valid blocks.
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
system.cpu.l2cache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions
system.cpu.l2cache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse 489.113488 # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse 487.752870 # Cycle average of tags in use
system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks.
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
system.cpu.l2cache.writebacks 0 # number of writebacks
system.cpu.l2cache.writebacks_0 0 # number of writebacks
system.cpu.l2cache.writebacks_1 0 # number of writebacks
-system.cpu.numCycles 188950 # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles 74870 # Number of cycles rename is blocking
+system.cpu.numCycles 189147 # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles 73147 # Number of cycles rename is blocking
system.cpu.rename.RENAME:CommittedMaps 8102 # Number of HB maps that are committed
-system.cpu.rename.RENAME:IQFullEvents 21 # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles 263382 # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents 2455 # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:IQFullEvents 24 # Number of times rename has blocked due to IQ full
+system.cpu.rename.RENAME:IdleCycles 265134 # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents 2520 # Number of times rename has blocked due to LSQ full
system.cpu.rename.RENAME:ROBFullEvents 31 # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups 72755 # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts 60875 # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands 44048 # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles 11047 # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles 5552 # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles 2536 # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps 35946 # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles 20340 # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts 51 # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts 4990 # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts 38 # count of temporary serializing insts renamed
-system.cpu.timesIdled 690 # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.rename.RENAME:RenameLookups 74254 # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts 61970 # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands 45003 # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles 11202 # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles 5733 # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles 2584 # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps 36901 # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles 20319 # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts 49 # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts 5114 # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts 37 # count of temporary serializing insts renamed
+system.cpu.timesIdled 691 # Number of times that the entire CPU went into an idle state and unscheduled itself
system.cpu.workload0.PROG:num_syscalls 17 # Number of system calls
system.cpu.workload1.PROG:num_syscalls 17 # Number of system calls
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
index c36de0b79..d8ccd6207 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
@@ -1,3 +1,5 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
-0: system.remote_gdb.listener: listening for remote gdb on port 7001
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001
warn: Entering event queue @ 0. Starting simulation...
+warn: Increasing stack size by one page.
+warn: Increasing stack size by one page.
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index f07a960f8..30a45522d 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,8 +7,9 @@ The Regents of The University of Michigan
All Rights Reserved
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:07:23 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
-Exiting @ tick 2237162 because target called exit()
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:16 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 2232164 because target called exit()