42 files changed, 1209 insertions, 1102 deletions
diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa
index b62372f66..af1a91a62 100644
--- a/src/arch/alpha/isa/decoder.isa
+++ b/src/arch/alpha/isa/decoder.isa
@@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() {
         0: OpcdecFault::hw_st_quad();
         1: decode HW_LDST_QUAD {
             format HwLoad {
-                0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L);
-                1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q);
+                0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }},
+                         L, IsSerializing, IsSerializeBefore);
+                1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }},
+                         Q, IsSerializing, IsSerializeBefore);
             }
         }
     }
@@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() {
             1: decode HW_LDST_COND {
                 0: decode HW_LDST_QUAD {
                     0: hw_st({{ EA = (Rb + disp) & ~3; }},
-                {{ Mem.ul = Ra<31:0>; }}, L);
+                {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore);
                     1: hw_st({{ EA = (Rb + disp) & ~7; }},
-                {{ Mem.uq = Ra.uq; }}, Q);
+                {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore);
                 }
 
                 1: FailUnimpl::hw_st_cond();
diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa
index 556bb4bca..68b2183ad 100644
--- a/src/arch/sparc/isa/decoder.isa
+++ b/src/arch/sparc/isa/decoder.isa
@@ -1324,8 +1324,14 @@ decode OP default Unknown::unknown()
             0x05: stb({{Mem.ub = Rd.sb;}});
             0x06: sth({{Mem.uhw = Rd.shw;}});
             0x07: sttw({{
-                      (Mem.tuw).a = RdLow<31:0>;
-                      (Mem.tuw).b = RdHigh<31:0>;
+                      //This temporary needs to be here so that the parser
+                      //will correctly identify this instruction as a store.
+                      //It's probably either the parenthesis or referencing
+                      //the member variable that throws confuses it.
+                      Twin32_t temp;
+                      temp.a = RdLow<31:0>;
+                      temp.b = RdHigh<31:0>;
+                      Mem.tuw = temp;
                   }});
         }
         format Load {
@@ -1417,8 +1423,14 @@ decode OP default Unknown::unknown()
             0x15: stba({{Mem.ub = Rd;}}, {{EXT_ASI}});
             0x16: stha({{Mem.uhw = Rd;}}, {{EXT_ASI}});
             0x17: sttwa({{
-                      (Mem.tuw).a = RdLow<31:0>;
-                      (Mem.tuw).b = RdHigh<31:0>;
+                      //This temporary needs to be here so that the parser
+                      //will correctly identify this instruction as a store.
+                      //It's probably either the parenthesis or referencing
+                      //the member variable that throws confuses it.
+                      Twin32_t temp;
+                      temp.a = RdLow<31:0>;
+                      temp.b = RdHigh<31:0>;
+                      Mem.tuw = temp;
                   }}, {{EXT_ASI}});
         }
         format LoadAlt {
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 3e0be6ad8..4dccee0d3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -226,7 +226,8 @@ BaseCPU::startup()
 #endif
 
     if (params->progress_interval) {
-        new CPUProgressEvent(&mainEventQueue, params->progress_interval,
+        new CPUProgressEvent(&mainEventQueue,
+                             cycles(params->progress_interval),
                              this);
     }
 }
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9ccdcdccc..6c6d90076 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The kind of fault this instruction has generated. */
     Fault fault;
 
-    /** The memory request. */
-    Request *req;
-
     /** Pointer to the data for the memory access. */
     uint8_t *memData;
 
     /** The effective virtual address (lds & stores only). */
     Addr effAddr;
 
+    /** Is the effective virtual address valid. */
+    bool effAddrValid;
+
     /** The effective physical address. */
     Addr physEffAddr;
 
@@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Returns whether or not this instruction is ready to issue. */
     bool readyToIssue() const { return status[CanIssue]; }
 
+    /** Clears this instruction being able to issue. */
+    void clearCanIssue() { status.reset(CanIssue); }
+
     /** Sets this instruction as issued from the IQ. */
     void setIssued() { status.set(Issued); }
 
     /** Returns whether or not this instruction has issued. */
     bool isIssued() const { return status[Issued]; }
 
+    /** Clears this instruction as being issued. */
+    void clearIssued() { status.reset(Issued); }
+
     /** Sets this instruction as executed. */
     void setExecuted() { status.set(Executed); }
 
@@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
      */
     bool eaCalcDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool isUncacheable;
+
+    /** Has this instruction generated a memory request. */
+    bool reqMade;
+
   public:
     /** Sets the effective address. */
     void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
@@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Whether or not the memory operation is done. */
     bool memOpDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool uncacheable() { return isUncacheable; }
+
+    /** Has this instruction generated a memory request. */
+    bool hasRequest() { return reqMade; }
+
   public:
     /** Load queue index. */
     int16_t lqIdx;
@@ -776,25 +794,25 @@ template<class T>
 inline Fault
 BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 {
-    // Sometimes reads will get retried, so they may come through here
-    // twice.
-    if (!req) {
-        req = new Request();
-        req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-        req->setThreadContext(thread->readCpuId(), threadNumber);
-    } else {
-        assert(addr == req->getVaddr());
-    }
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataReadReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 
@@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
         // Commit will have to clean up whatever happened.  Set this
         // instruction as executed.
         this->setExecuted();
+        delete req;
     }
 
     if (traceData) {
@@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         traceData->setData(data);
     }
 
-    assert(req == NULL);
-
-    req = new Request();
+    reqMade = true;
+    Request *req = new Request();
     req->setVirt(asid, addr, sizeof(T), flags, this->PC);
     req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataWriteReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 #if 0
@@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #else
         fault = cpu->write(req, data, sqIdx);
 #endif
-    }
-
-    if (res) {
-        // always return some result to keep misspeculated paths
-        // (which will ignore faults) deterministic
-        *res = (fault == NoFault) ? req->getExtraData() : 0;
+    } else {
+        delete req;
     }
 
     return fault;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index c3d71e428..a1c866336 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -92,11 +92,13 @@ template <class Impl>
 void
 BaseDynInst<Impl>::initVars()
 {
-    req = NULL;
     memData = NULL;
     effAddr = 0;
+    effAddrValid = false;
     physEffAddr = 0;
 
+    isUncacheable = false;
+    reqMade = false;
     readyRegs = 0;
 
     instResult.integer = 0;
@@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars()
 template <class Impl>
 BaseDynInst<Impl>::~BaseDynInst()
 {
-    if (req) {
-        delete req;
-    }
-
     if (memData) {
         delete [] memData;
     }
@@ -271,7 +269,7 @@ void
 BaseDynInst<Impl>::markSrcRegReady()
 {
     if (++readyRegs == numSrcRegs()) {
-        status.set(CanIssue);
+        setCanIssue();
     }
 }
 
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+Param<int> cpu_id;
 Param<int> activity;
 
 #if FULL_SYSTEM
 SimObjectParam<System *> system;
-Param<int> cpu_id;
 SimObjectParam<AlphaISA::ITB *> itb;
 SimObjectParam<AlphaISA::DTB *> dtb;
 Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     AlphaSimpleParams *params = new AlphaSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index b91972704..304ee6c38 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
 #endif
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
     /** Handles squashing due to an TC write. */
     void squashFromTC(unsigned tid);
 
+#if FULL_SYSTEM
+    /** Handles processing an interrupt. */
+    void handleInterrupt();
+#endif // FULL_SYSTEM
+
     /** Commits as many instructions as possible. */
     void commitInsts();
 
@@ -409,6 +414,16 @@ class DefaultCommit
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
+    /** Records if there is a trap currently in flight. */
+    bool trapInFlight[Impl::MaxThreads];
+
+    /** Records if there were any stores committed this cycle. */
+    bool committedStores[Impl::MaxThreads];
+
+    /** Records if commit should check if the ROB is truly empty (see
+        commit_impl.hh). */
+    bool checkEmptyROB[Impl::MaxThreads];
+
     /** Pointer to the list of active threads. */
     std::list<unsigned> *activeThreads;
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 18fb2aaa3..3fd85595f 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     for (int i=0; i < numThreads; i++) {
         commitStatus[i] = Idle;
         changedROBNumEntries[i] = false;
+        checkEmptyROB[i] = false;
+        trapInFlight[i] = false;
+        committedStores[i] = false;
         trapSquash[i] = false;
         tcSquash[i] = false;
         PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage()
     for (int i=0; i < numThreads; i++) {
         toIEW->commitInfo[i].usedROB = true;
         toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+        toIEW->commitInfo[i].emptyROB = true;
     }
 
     cpu->activityThisCycle();
@@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
     TrapEvent *trap = new TrapEvent(this, tid);
 
     trap->schedule(curTick + trapLatency);
-
-    thread[tid]->trapPending = true;
+    trapInFlight[tid] = true;
 }
 
 template <class Impl>
 void
 DefaultCommit<Impl>::generateTCEvent(unsigned tid)
 {
+    assert(!trapInFlight[tid]);
     DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
 
     tcSquash[tid] = true;
@@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
     // Hopefully this doesn't mess things up.  Basically I want to squash
     // all instructions of this thread.
     InstSeqNum squashed_inst = rob->isEmpty() ?
-        0 : rob->readHeadInst(tid)->seqNum - 1;;
+        0 : rob->readHeadInst(tid)->seqNum - 1;
 
     // All younger instructions will be squashed. Set the sequence
     // number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
 
     thread[tid]->trapPending = false;
     thread[tid]->inSyscall = false;
+    trapInFlight[tid] = false;
 
     trapSquash[tid] = false;
 
@@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick()
     while (threads != end) {
         unsigned tid = *threads++;
 
+        // Clear the bit saying if the thread has committed stores
+        // this cycle.
+        committedStores[tid] = false;
+
         if (commitStatus[tid] == ROBSquashing) {
 
             if (rob->isDoneSquashing(tid)) {
@@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick()
     updateStatus();
 }
 
+#if FULL_SYSTEM
 template <class Impl>
 void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
 {
-
-    //////////////////////////////////////
-    // Check for interrupts
-    //////////////////////////////////////
-
-#if FULL_SYSTEM
     if (interrupt != NoFault) {
         // Wait until the ROB is empty and all stores have drained in
         // order to enter the interrupt.
@@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit()
             // an interrupt needed to be handled.
             DPRINTF(Commit, "Interrupt detected.\n");
 
+            Fault new_interrupt = cpu->getInterrupts();
+            assert(new_interrupt != NoFault);
+
+            // Clear the interrupt now that it's going to be handled
+            toIEW->commitInfo[0].clearInterrupt = true;
+
             assert(!thread[0]->inSyscall);
             thread[0]->inSyscall = true;
 
@@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit()
             // Generate trap squash event.
             generateTrapEvent(0);
 
-            // Clear the interrupt now that it's been handled
-            toIEW->commitInfo[0].clearInterrupt = true;
             interrupt = NoFault;
         } else {
             DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
         }
-    } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
-        commitStatus[0] != TrapPending &&
-        !trapSquash[0] &&
-        !tcSquash[0]) {
+    } else if (commitStatus[0] != TrapPending &&
+               cpu->check_interrupts(cpu->tcBase(0)) &&
+               !trapSquash[0] &&
+               !tcSquash[0]) {
         // Process interrupts if interrupts are enabled, not in PAL
         // mode, and no other traps or external squashes are currently
         // pending.
@@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[0].interruptPending = true;
         }
     }
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
 
+#if FULL_SYSTEM
+    // Check for any interrupt, and start processing it.  Or if we
+    // have an outstanding interrupt and are at a point when it is
+    // valid to take an interrupt, process it.
+    if (cpu->check_interrupts(cpu->tcBase(0))) {
+        handleInterrupt();
+    }
 #endif // FULL_SYSTEM
 
     ////////////////////////////////////
@@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit()
             assert(!tcSquash[tid]);
             squashFromTrap(tid);
         } else if (tcSquash[tid] == true) {
+            assert(commitStatus[tid] != TrapPending);
             squashFromTC(tid);
         }
 
@@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit()
                 bdelay_done_seq_num--;
 #endif
             }
+
             // All younger instructions will be squashed. Set the sequence
             // number as the youngest instruction in the ROB.
             youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[tid].usedROB = true;
             toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
 
-            if (rob->isEmpty(tid)) {
-                toIEW->commitInfo[tid].emptyROB = true;
-            }
-
             wroteToTimeBuffer = true;
             changedROBNumEntries[tid] = false;
+            if (rob->isEmpty(tid))
+                checkEmptyROB[tid] = true;
         }
+
+        // ROB is only considered "empty" for previous stages if: a)
+        // ROB is empty, b) there are no outstanding stores, c) IEW
+        // stage has received any information regarding stores that
+        // committed.
+        // c) is checked by making sure to not consider the ROB empty
+        // on the same cycle as when stores have been committed.
+        // @todo: Make this handle multi-cycle communication between
+        // commit and IEW.
+        if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+            !iewStage->hasStoresToWB() && !committedStores[tid]) {
+            checkEmptyROB[tid] = false;
+            toIEW->commitInfo[tid].usedROB = true;
+            toIEW->commitInfo[tid].emptyROB = true;
+            toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+            wroteToTimeBuffer = true;
+        }
+
     }
 }
 
@@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         thread[tid]->funcExeInst--;
 
-        head_inst->setAtCommit();
-
         if (head_inst->isNonSpeculative() ||
             head_inst->isStoreConditional() ||
             head_inst->isMemBarrier() ||
@@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
                     "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
-            // Hack to make sure syscalls/memory barriers/quiesces
-            // aren't executed until all stores write back their data.
-            // This direct communication shouldn't be used for
-            // anything other than this.
-            if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-                    head_inst->isQuiesce()) &&
-                iewStage->hasStoresToWB())
-            {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
                 DPRINTF(Commit, "Waiting for all stores to writeback.\n");
                 return false;
-            } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
-                DPRINTF(Commit, "Waiting to become head of commit.\n");
-                return false;
             }
 
             toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
             return false;
         } else if (head_inst->isLoad()) {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
+                DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+
+            assert(head_inst->uncacheable());
             DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
@@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         panic("Thread sync instructions are not handled yet.\n");
     }
 
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = head_inst->getFault();
+
     // Stores mark themselves as completed.
-    if (!head_inst->isStore()) {
+    if (!head_inst->isStore() && inst_fault == NoFault) {
         head_inst->setCompleted();
     }
 
@@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     }
 #endif
 
-    // Check if the instruction caused a fault.  If so, trap.
-    Fault inst_fault = head_inst->getFault();
-
     // DTB will sometimes need the machine instruction for when
     // faults happen.  So we will set it here, prior to the DTB
     // possibly needing it for its fault.
@@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
 
     if (inst_fault != NoFault) {
-        head_inst->setCompleted();
         DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
                 head_inst->seqNum, head_inst->readPC());
 
@@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             return false;
         }
 
+        head_inst->setCompleted();
+
 #if USE_CHECKER
         if (cpu->checker && head_inst->isStore()) {
             cpu->checker->verify(head_inst);
@@ -1082,6 +1117,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
         commitStatus[tid] = TrapPending;
 
+        if (head_inst->traceData) {
+            head_inst->traceData->setFetchSeq(head_inst->seqNum);
+            head_inst->traceData->setCPSeq(thread[tid]->numInst);
+            head_inst->traceData->dump();
+            delete head_inst->traceData;
+            head_inst->traceData = NULL;
+        }
+
         // Generate trap squash event.
         generateTrapEvent(tid);
 //        warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1123,6 +1166,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     // Finally clear the head ROB entry.
     rob->retireHead(tid);
 
+    // If this was a store, record it for this cycle.
+    if (head_inst->isStore())
+        committedStores[tid] = true;
+
     // Return true to indicate that we have committed an instruction.
     return true;
 }
@@ -1167,7 +1214,8 @@ DefaultCommit<Impl>::getInsts()
         int tid = inst->threadNumber;
 
         if (!inst->isSquashed() &&
-            commitStatus[tid] != ROBSquashing) {
+            commitStatus[tid] != ROBSquashing &&
+            commitStatus[tid] != TrapPending) {
             changedROBNumEntries[tid] = true;
 
             DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 38e6a0b5b..354e3c490 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
             lastRunningCycle = curTick;
             timesIdled++;
         } else {
-            tickEvent.schedule(curTick + cycles(1));
+            tickEvent.schedule(nextCycle(curTick + cycles(1)));
             DPRINTF(O3CPU, "Scheduling next tick!\n");
         }
     }
@@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume()
 #endif
 
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
     _status = Running;
 }
 
@@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
         ThreadContext *tc = threadContexts[i];
         if (tc->status() == ThreadContext::Active && _status != Running) {
             _status = Running;
-            tickEvent.schedule(curTick);
+            tickEvent.schedule(nextCycle());
         }
     }
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
@@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU()
 
     idleCycles += (curTick - 1) - lastRunningCycle;
 
-    tickEvent.schedule(curTick);
+    tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ea374dd57..0ab20ba2a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + cycles(delay));
+            tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
         else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + cycles(delay));
+            tickEvent.schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (activateThreadEvent[tid].squashed())
-            activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!activateThreadEvent[tid].scheduled())
-            activateThreadEvent[tid].schedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (deallocateContextEvent[tid].squashed())
-            deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!deallocateContextEvent[tid].scheduled())
-            deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 1256dd233..663cd3142 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -620,6 +620,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                 fault = TheISA::genMachineCheckFault();
                 delete mem_req;
                 memReq[tid] = NULL;
+                warn("Bad address!\n");
             }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
@@ -670,11 +671,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
-        cacheBlocked = false;
-        retryTid = -1;
-        delete retryPkt->req;
-        delete retryPkt;
+        if (retryPkt) {
+            delete retryPkt->req;
+            delete retryPkt;
+        }
         retryPkt = NULL;
+        retryTid = -1;
     }
 
     fetchStatus[tid] = Squashing;
@@ -1150,7 +1152,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
             ///FIXME This needs to be more robust in dealing with delay slots
 #if !ISA_HAS_DELAY_SLOT
-            predicted_branch |=
+//	    predicted_branch |=
 #endif
             lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
             predicted_branch |= (next_PC != fetch_NPC);
@@ -1221,7 +1223,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
 #if FULL_SYSTEM
-        assert(numInst != fetchWidth);
+        assert(numInst < fetchWidth);
         // Get a sequence number.
         inst_seq = cpu->getAndIncrementInstSeq();
         // We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..4883e5a5c 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1153,19 +1153,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
             inst->setCanCommit();
             instQueue.insertBarrier(inst);
             add_to_iq = false;
-        } else if (inst->isNonSpeculative()) {
-            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
-                    "encountered, skipping.\n", tid);
-
-            // Same as non-speculative stores.
-            inst->setCanCommit();
-
-            // Specifically insert it as nonspeculative.
-            instQueue.insertNonSpec(inst);
-
-            ++iewDispNonSpecInsts;
-
-            add_to_iq = false;
         } else if (inst->isNop()) {
             DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
                     "skipping.\n", tid);
@@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
         } else {
             add_to_iq = true;
         }
+        if (inst->isNonSpeculative()) {
+            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+                    "encountered, skipping.\n", tid);
+
+            // Same as non-speculative stores.
+            inst->setCanCommit();
+
+            // Specifically insert it as nonspeculative.
+            instQueue.insertNonSpec(inst);
+
+            ++iewDispNonSpecInsts;
+
+            add_to_iq = false;
+        }
 
         // If the instruction queue is not full, then add the
         // instruction.
@@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts()
                     predictedNotTakenIncorrect++;
                 }
             } else if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
                 // If there was an ordering violation, then get the
                 // DynInst that caused the violation.  Note that this
                 // clears the violation signal.
@@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts()
 
                 // Ensure the violating instruction is older than
                 // current squash
-                if (fetchRedirect[tid] &&
-                    violator->seqNum >= toCommit->squashedSeqNum[tid])
+/*                if (fetchRedirect[tid] &&
+                    violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
                     continue;
-
+*/
                 fetchRedirect[tid] = true;
 
                 // Tell the instruction queue that a violation has occured.
@@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts()
 
                 squashDueToMemBlocked(inst, tid);
             }
+        } else {
+            // Reset any state associated with redirects that will not
+            // be used.
+            if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
+
+                DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
+                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+                DPRINTF(IEW, "Violation will not be handled because "
+                        "already squashing\n");
+
+                ++memOrderViolationEvents;
+            }
+            if (ldstQueue.loadBlocked(tid) &&
+                !ldstQueue.isLoadBlockedHandled(tid)) {
+                DPRINTF(IEW, "Load operation couldn't execute because the "
+                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+                        inst->readPC(), inst->seqNum);
+                DPRINTF(IEW, "Blocked load will not be handled because "
+                        "already squashing\n");
+
+                ldstQueue.setLoadBlockedHandled(tid);
+            }
+
         }
     }
 
@@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick()
             //DPRINTF(IEW,"NonspecInst from thread %i",tid);
             if (fromCommit->commitInfo[tid].uncached) {
                 instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+                fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
             } else {
                 instQueue.scheduleNonSpec(
                     fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d5781d89d..79e03d4bf 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
     unsigned tid = (*inst_it).second->threadNumber;
 
+    (*inst_it).second->setAtCommit();
+
     (*inst_it).second->setCanIssue();
 
     if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +962,8 @@ template <class Impl>
 void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
+    DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+    resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
 
@@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
     completed_inst->memOpDone = true;
 
     memDepUnit[tid].completed(completed_inst);
-
     count[tid]--;
 }
 
@@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
 
                     ++iqSquashedOperandsExamined;
                 }
-            } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+            } else if (!squashed_inst->isStoreConditional() ||
+                       !squashed_inst->isCompleted()) {
                 NonSpecMapIt ns_inst_it =
                     nonSpecInsts.find(squashed_inst->seqNum);
                 assert(ns_inst_it != nonSpecInsts.end());
+                if (ns_inst_it == nonSpecInsts.end()) {
+                    assert(squashed_inst->getFault() != NoFault);
+                } else {
 
-                (*ns_inst_it).second = NULL;
+                    (*ns_inst_it).second = NULL;
 
-                nonSpecInsts.erase(ns_inst_it);
+                    nonSpecInsts.erase(ns_inst_it);
 
-                ++iqSquashedNonSpecRemoved;
+                    ++iqSquashedNonSpecRemoved;
+                }
             }
 
             // Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2419afe29..1b10843f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
+
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete req;
         return TheISA::genMachineCheckFault();
     }
 
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
         if (store_size == 0)
             continue;
+        else if (storeQueue[store_idx].inst->uncacheable())
+            continue;
+
+        assert(storeQueue[store_idx].inst->effAddrValid);
 
         // Check if the store data is within the lower and upper bounds of
         // addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             storeQueue[store_idx].inst->effAddr;
 
         // If the store's data has all of the data needed, we can forward.
-        if (store_has_lower_limit && store_has_upper_limit) {
+        if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
             // @todo: Magic number, assumes byte addressing
@@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // If it's already been written back, then don't worry about
             // stalling on it.
             if (storeQueue[store_idx].completed) {
+                panic("Should not check one of these");
                 continue;
             }
 
@@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // rescheduled eventually
             iewStage->rescheduleMemInst(load_inst);
             iewStage->decrWb(load_inst->seqNum);
+            load_inst->clearIssued();
             ++lsqRescheduledLoads;
 
             // Do not generate a writeback event as this instruction is not
@@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                     "Store idx %i to load addr %#x\n",
                     store_idx, req->getVaddr());
 
-            ++lsqBlockedLoads;
+            // Must delete request now that it wasn't handed off to
+            // memory.  This is quite ugly.  @todo: Figure out the
+            // proper place to really handle request deletes.
+            delete req;
+
             return NoFault;
         }
     }
@@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
+            delete data_pkt->req;
             delete data_pkt;
 
+            req = NULL;
+
             if (result == Packet::BadAddress) {
                 return TheISA::genMachineCheckFault();
             }
@@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // If the cache was blocked, or has become blocked due to the access,
     // handle it.
     if (lsq->cacheBlocked()) {
+        if (req)
+            delete req;
+
         ++lsqCacheBlocked;
 
         iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3ba22a530..e70c960b3 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     if (isSwitchedOut() || inst->isSquashed()) {
         iewStage->decrWb(inst->seqNum);
         delete state;
+        delete pkt->req;
         delete pkt;
         return;
     } else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     }
 
     delete state;
+    delete pkt->req;
     delete pkt;
 }
 
@@ -403,12 +405,15 @@ template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
 {
+    using namespace TheISA;
     // Execute a specific load.
     Fault load_fault = NoFault;
 
     DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
             inst->readPC(),inst->seqNum);
 
+    assert(!inst->isSquashed());
+
     load_fault = inst->initiateAcc();
 
     // If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req && inst->req->isUncacheable()) ||
+        if (!(inst->hasRequest() && inst->uncacheable()) ||
             inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
         iewStage->activityThisCycle();
+    } else if (!loadBlocked()) {
+        assert(inst->effAddrValid);
+        int load_idx = inst->lqIdx;
+        incrLdIdx(load_idx);
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if (loadQueue[load_idx]->effAddrValid &&
+                (loadQueue[load_idx]->effAddr >> 8) ==
+                (inst->effAddr >> 8)) {
+                // A load incorrectly passed this load.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                DynInstPtr violator = loadQueue[load_idx];
+                if (!memDepViolator ||
+                    (violator->seqNum < memDepViolator->seqNum)) {
+                    memDepViolator = violator;
+                } else {
+                    break;
+                }
+
+                ++lsqMemOrderViolation;
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
     }
 
     return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
     DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
             store_inst->readPC(), store_inst->seqNum);
 
+    assert(!store_inst->isSquashed());
+
     // Check the recently completed loads to see if any match this store's
     // address.  If so, then we have a memory ordering violation.
     int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
         ++storesToWB;
     }
 
-    if (!memDepViolator) {
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
-            // It's safe to check all loads because effAddr is set to
-            // InvalAddr when the dyn inst is created.
-
-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
-
-            // @todo: Fix this, magic number being used here
-            if ((loadQueue[load_idx]->effAddr >> 8) ==
-                (store_inst->effAddr >> 8)) {
-                // A load incorrectly passed this store.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                memDepViolator = loadQueue[load_idx];
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
+    assert(store_inst->effAddrValid);
+    while (load_idx != loadTail) {
+        // Really only need to check loads that have actually executed
+        // It's safe to check all loads because effAddr is set to
+        // InvalAddr when the dyn inst is created.
+
+        // @todo: For now this is extra conservative, detecting a
+        // violation if the addresses match assuming all accesses
+        // are quad word accesses.
+
+        // @todo: Fix this, magic number being used here
+        if (loadQueue[load_idx]->effAddrValid &&
+            (loadQueue[load_idx]->effAddr >> 8) ==
+            (store_inst->effAddr >> 8)) {
+            // A load incorrectly passed this store.  Squash and refetch.
+            // For now return a fault to show that it was unsuccessful.
+            DynInstPtr violator = loadQueue[load_idx];
+            if (!memDepViolator ||
+                (violator->seqNum < memDepViolator->seqNum)) {
+                memDepViolator = violator;
+            } else {
+                break;
             }
 
-            incrLdIdx(load_idx);
+            ++lsqMemOrderViolation;
+
+            return genMachineCheckFault();
         }
 
-        // If we've reached this point, there was no violation.
-        memDepViolator = NULL;
+        incrLdIdx(load_idx);
     }
 
     return store_fault;
@@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores()
                 panic("LSQ sent out a bad address for a completed store!");
             }
             // Need to handle becoming blocked on a store.
-            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
                     inst->seqNum);
             isStoreBlocked = true;
@@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
     }
 
+    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+        memDepViolator = NULL;
+    }
+
     int store_idx = storeTail;
     decrStIdx(store_idx);
 
@@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete storeQueue[store_idx].req;
+
         storeQueue[store_idx].req = NULL;
         --stores;
 
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
             inst_entry->regsReady = true;
         }
 
+        // Clear the bit saying this instruction can issue.
+        inst->clearCanIssue();
+
         // Add this instruction to the list of dependents.
         store_entry->dependInsts.push_back(inst_entry);
 
@@ -357,7 +360,6 @@ void
 MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 {
     DynInstPtr temp_inst;
-    bool found_inst = false;
 
     // For now this replay function replays all waiting memory ops.
     while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 
         moveToReady(inst_entry);
 
-        if (temp_inst == inst) {
-            found_inst = true;
-        }
-
         instsToReplay.pop_front();
     }
-
-    assert(found_inst);
 }
 
 template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
         // known that the prev reg was outside the range of normal registers
         // so the free list can avoid adding it.
         prev_reg = renamed_reg;
-
-        assert(renamed_reg < numPhysicalRegs + numMiscRegs);
     }
 
     DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4988df3c5..6e6ba2380 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt)
     }
 
     short dest = pkt->getDest();
+
+    // Make sure to clear the snoop commit flag so it doesn't think an
+    // access has been handled twice.
     if (dest == Packet::Broadcast) {
         port = findPort(pkt->getAddr(), pkt->getSrc());
+        pkt->flags &= ~SNOOP_COMMIT;
         if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) {
             bool success;
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 5c6ab0950..fc4660269 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -545,8 +545,13 @@ Cache<TagStore,Coherence>::access(PacketPtr &pkt)
         //We are determining prefetches on access stream, call prefetcher
         prefetcher->handleMiss(pkt, curTick);
     }
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
+
     if (!pkt->req->isUncacheable()) {
-        blk = handleAccess(pkt, lat, writebacks);
+        if (!missQueue->findMSHR(blk_addr)) {
+            blk = handleAccess(pkt, lat, writebacks);
+        }
     } else {
         size = pkt->getSize();
     }
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 25b8fcbeb..24ca9cfa2 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time)
             MemCmd cmd = mshr->getTarget()->cmd;
             mshr->pkt->setDest(Packet::Broadcast);
             mshr->pkt->result = Packet::Unknown;
+            mshr->pkt->req = mshr->getTarget()->req;
             mq.markPending(mshr, cmd);
             mshr->order = order++;
             cache->setMasterRequest(Request_MSHR, time);
diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py
index a66cd436e..5600d9f22 100644
--- a/tests/configs/o3-timing.py
+++ b/tests/configs/o3-timing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@ class MyCache(BaseCache):
     mshrs = 10
     tgts_per_mshr = 5
 
-cpu = DerivO3CPU()
+cpu = DerivO3CPU(cpu_id=0)
 cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
                               MyCache(size = '2MB'))
 
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
new file mode 100644
index 000000000..73a28200e
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
@@ -0,0 +1,64 @@
+[root]
+type=Root
+children=system
+dummy=0
+
+[system]
+type=System
+children=cpu membus physmem
+mem_mode=atomic
+physmem=system.physmem
+
+[system.cpu]
+type=AtomicSimpleCPU
+children=workload
+clock=1
+cpu_id=0
+defer_registration=false
+function_trace=false
+function_trace_start=0
+max_insts_all_threads=0
+max_insts_any_thread=0
+max_loads_all_threads=0
+max_loads_any_thread=0
+phase=0
+progress_interval=0
+simulate_stalls=false
+system=system
+width=1
+workload=system.cpu.workload
+dcache_port=system.membus.port[2]
+icache_port=system.membus.port[1]
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=mcf mcf.in
+cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
+egid=100
+env=
+euid=100
+executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
+gid=100
+input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+output=cout
+pid=100
+ppid=99
+system=system
+uid=100
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+responder_set=false
+width=64
+port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+
+[system.physmem]
+type=PhysicalMemory
+file=
+latency=1
+range=0:134217727
+zero=false
+port=system.membus.port[0]
+
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
new file mode 100644
index 000000000..2b86e6bfb
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
@@ -0,0 +1,57 @@
+[root]
+type=Root
+dummy=0
+
+[system.physmem]
+type=PhysicalMemory
+file=
+range=[0,134217727]
+latency=1
+zero=false
+
+[system]
+type=System
+physmem=system.physmem
+mem_mode=atomic
+
+[system.membus]
+type=Bus
+bus_id=0
+clock=1000
+width=64
+responder_set=false
+
+[system.cpu.workload]
+type=LiveProcess
+cmd=mcf mcf.in
+executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
+input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+output=cout
+env=
+cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
+system=system
+uid=100
+euid=100
+gid=100
+egid=100
+pid=100
+ppid=99
+
+[system.cpu]
+type=AtomicSimpleCPU
+max_insts_any_thread=0
+max_insts_all_threads=0
+max_loads_any_thread=0
+max_loads_all_threads=0
+progress_interval=0
+system=system
+cpu_id=0
+workload=system.cpu.workload
+clock=1
+phase=0
+defer_registration=false
+width=1
+function_trace=false
+function_trace_start=0
+simulate_stalls=false
+
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
new file mode 100644
index 000000000..41e6bfc52
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
@@ -0,0 +1,18 @@
+
+---------- Begin Simulation Statistics ----------
+host_inst_rate                                 624449                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 148644                       # Number of bytes of host memory used
+host_seconds                                  2753.78                       # Real time elapsed on the host
+host_tick_rate                                 624449                       # Simulator tick rate (ticks/s)
+sim_freq                                 1000000000000                       # Frequency of simulated ticks
+sim_insts                                  1719594534                       # Number of instructions simulated
+sim_seconds                                  0.001720                       # Number of seconds simulated
+sim_ticks                                  1719594533                       # Number of ticks simulated
+system.cpu.idle_fraction                            0                       # Percentage of idle cycles
+system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
+system.cpu.numCycles                       1719594534                       # number of cpu cycles simulated
+system.cpu.num_insts                       1719594534                       # Number of instructions executed
+system.cpu.num_refs                         774793634                       # Number of memory references
+system.cpu.workload.PROG:num_syscalls             632                       # Number of system calls
+
+---------- End Simulation Statistics   ----------
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr
new file mode 100644
index 000000000..9c09fd847
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stderr
@@ -0,0 +1,7 @@
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0xa2000 length 0x10.
+warn: More than two loadable segments in ELF object.
+warn: Ignoring segment @ 0x0 length 0x0.
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
+warn: Entering event queue @ 0.  Starting simulation...
+warn: Ignoring request to flush register windows.
diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
new file mode 100644
index 000000000..6711761e8
--- /dev/null
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
@@ -0,0 +1,33 @@
+
+MCF SPEC version 1.6.I
+by  Andreas Loebel
+Copyright (c) 1998,1999   ZIB Berlin
+All Rights Reserved.
+
+nodes                      : 1800
+active arcs                : 8190
+simplex iterations         : 6837
+flow value                 : 12860044181
+new implicit arcs          : 300000
+active arcs                : 308190
+simplex iterations         : 11843
+flow value                 : 9360043604
+new implicit arcs          : 22787
+active arcs                : 330977
+simplex iterations         : 11931
+flow value                 : 9360043512
+checksum                   : 798014
+optimal
+M5 Simulator System
+
+Copyright (c) 2001-2006
+The Regents of The University of Michigan
+All Rights Reserved
+
+
+M5 compiled Mar 23 2007 22:37:06
+M5 started Fri Mar 23 22:37:22 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1719594533 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index 2296e2545..cc4477d68 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -1,48 +1,7 @@
 [root]
 type=Root
 children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
 
 [system]
 type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
 commitToIEWDelay=1
 commitToRenameDelay=1
 commitWidth=8
+cpu_id=0
 decodeToFetchDelay=1
 decodeToRenameDelay=1
 decodeWidth=8
@@ -417,12 +377,3 @@ range=0:134217727
 zero=false
 port=system.membus.port[0]
 
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
index 1b1b58f1b..f50559125 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
@@ -1,9 +1,6 @@
 [root]
 type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
 
 [system.physmem]
 type=PhysicalMemory
@@ -173,6 +170,7 @@ type=DerivO3CPU
 clock=1
 phase=0
 numThreads=1
+cpu_id=0
 activity=0
 workload=system.cpu.workload
 checker=null
@@ -367,51 +365,3 @@ clock=1000
 width=64
 responder_set=false
 
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 4e3fdbcd2..4b323618c 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          675                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      2343                       # Number of BTB lookups
+global.BPredUnit.BTBHits                          669                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      2338                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                      76                       # Number of incorrect RAS predictions.
 global.BPredUnit.condIncorrect                    437                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1563                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         5229                       # Number of BP lookups
+global.BPredUnit.condPredicted                   1559                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         5224                       # Number of BP lookups
 global.BPredUnit.usedRAS                         2821                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  11609                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 177052                       # Number of bytes of host memory used
-host_seconds                                     0.48                       # Real time elapsed on the host
-host_tick_rate                                2887871                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 23                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores               117                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3775                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 3734                       # Number of stores inserted to the mem dependence unit.
+host_inst_rate                                  12539                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 156028                       # Number of bytes of host memory used
+host_seconds                                     0.45                       # Real time elapsed on the host
+host_tick_rate                                3120138                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 24                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores                12                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  3770                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 3723                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                     1400135                       # Number of ticks simulated
+sim_ticks                                     1400134                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                97                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               101                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        51243                      
+system.cpu.commit.COM:committed_per_cycle.samples        52214                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        48519   9468.42%           
-                               1         1590    310.29%           
-                               2          483     94.26%           
-                               3          227     44.30%           
-                               4          131     25.56%           
-                               5          104     20.30%           
-                               6           61     11.90%           
-                               7           31      6.05%           
-                               8           97     18.93%           
+                               0        49499   9480.02%           
+                               1         1576    301.83%           
+                               2          483     92.50%           
+                               3          233     44.62%           
+                               4          133     25.47%           
+                               5          102     19.53%           
+                               6           60     11.49%           
+                               7           27      5.17%           
+                               8          101     19.34%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -46,66 +46,66 @@ system.cpu.commit.COM:swp_count                     0                       # Nu
 system.cpu.commit.branchMispredicts               368                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           13830                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts           13804                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                             249.001423                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       249.001423                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1600                       # number of ReadReq accesses(hits+misses)
+system.cpu.cpi                             249.001245                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       249.001245                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1596                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency  6986.684848                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6882.626263                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1435                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits                   1431                       # number of ReadReq hits
 system.cpu.dcache.ReadReq_miss_latency        1152803                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.103125                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate          0.103383                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                  165                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                66                       # number of ReadReq MSHR hits
 system.cpu.dcache.ReadReq_mshr_miss_latency       681380                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.061875                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.062030                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              99                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  5293.047244                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5141.082192                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  5293.200787                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5141.095890                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   558                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1344434                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       1344473                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.312808                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 254                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              181                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       375299                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency       375300                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  3366.651163                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.587209                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets  3366.930233                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                  11.563953                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets               43                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets       144766                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets       144778                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2412                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5959.992840                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  6143.482558                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1993                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2497237                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.173715                       # miss rate for demand accesses
+system.cpu.dcache.demand_accesses                2408                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  5960.085919                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  6143.488372                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1989                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2497276                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.174003                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   419                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                247                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1056679                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.071310                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency      1056680                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.071429                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              172                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2412                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5959.992840                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  6143.482558                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2408                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  5960.085919                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  6143.488372                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1993                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2497237                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.173715                       # miss rate for overall accesses
+system.cpu.dcache.overall_hits                   1989                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2497276                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.174003                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  419                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits               247                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1056679                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.071310                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency      1056680                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.071429                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             172                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -121,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    172                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                101.349720                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1993                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                101.349670                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1989                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.decode.DECODE:BlockedCycles          17501                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             70                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           168                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           29666                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             28130                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               5553                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            2529                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:BranchResolved           167                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           29609                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             29114                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               5540                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            2527                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            200                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles             60                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        5229                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      6371                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         13322                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   296                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          35572                       # Number of instructions fetch has processed
+system.cpu.fetch.Branches                        5224                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      6367                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                         13308                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   295                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          35526                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                    2057                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.097242                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               6371                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               3496                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.661522                       # Number of inst fetches per cycle
+system.cpu.fetch.branchRate                  0.095429                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               7360                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               3490                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.648972                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               53773                      
+system.cpu.fetch.rateDist.samples               54742                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        46825   8707.90%           
-                               1          199     37.01%           
-                               2          504     93.73%           
-                               3         1429    265.75%           
-                               4         1462    271.88%           
-                               5          245     45.56%           
-                               6          322     59.88%           
-                               7         1223    227.44%           
-                               8         1564    290.85%           
+                               0        47805   8732.78%           
+                               1          199     36.35%           
+                               2          500     91.34%           
+                               3         1426    260.49%           
+                               4         1459    266.52%           
+                               5          244     44.57%           
+                               6          327     59.73%           
+                               7         1225    223.78%           
+                               8         1557    284.43%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               6370                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5088.614350                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_accesses               6366                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5085.923937                       # average ReadReq miss latency
 system.cpu.icache.ReadReq_avg_mshr_miss_latency  4278.032258                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   5924                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        2269522                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.070016                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  446                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               136                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_hits                   5919                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        2273408                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.070217                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  447                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits               137                       # number of ReadReq MSHR hits
 system.cpu.icache.ReadReq_mshr_miss_latency      1326190                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.048666                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate     0.048696                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             310                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets  3444.375000                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  19.109677                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets  3443.500000                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                  19.093548                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                8                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets        27555                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets        27548                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                6370                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5088.614350                       # average overall miss latency
+system.cpu.icache.demand_accesses                6366                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5085.923937                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    5924                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         2269522                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.070016                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   446                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                136                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_hits                    5919                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         2273408                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.070217                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   447                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                137                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_miss_latency      1326190                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.048666                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.048696                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              310                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               6370                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5088.614350                       # average overall miss latency
+system.cpu.icache.overall_accesses               6366                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5085.923937                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   5924                       # number of overall hits
-system.cpu.icache.overall_miss_latency        2269522                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.070016                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  446                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               136                       # number of overall MSHR hits
+system.cpu.icache.overall_hits                   5919                       # number of overall hits
+system.cpu.icache.overall_miss_latency        2273408                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.070217                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  447                       # number of overall misses
+system.cpu.icache.overall_mshr_hits               137                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_miss_latency      1326190                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.048666                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.048696                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             310                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -218,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    310                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                147.070827                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     5924                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                147.070711                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     5919                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                         1346363                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2364                       # Number of branches executed
+system.cpu.idleCycles                         1345393                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     2362                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            48                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.251650                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5460                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       2123                       # Number of stores executed
+system.cpu.iew.EXEC:rate                     0.247123                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         5464                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       2131                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.WB:consumers                      6466                       # num instructions consuming a value
-system.cpu.iew.WB:count                         11620                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.798639                       # average fanout of values written-back
+system.cpu.iew.WB:count                         11625                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.798948                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      5164                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.216094                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          11692                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:producers                      5166                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.212360                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          11698                       # cumulative count of insts sent to commit
 system.cpu.iew.branchMispredicts                  401                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                    7230                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3775                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  3770                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 24                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2557                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 3734                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19465                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3337                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               308                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 13532                       # Number of executed instructions
+system.cpu.iew.iewDispSquashedInsts              2547                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 3723                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               19439                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  3333                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               305                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 13528                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                     10                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     1                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   2529                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                   2527                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                    39                       # Number of cycles IEW is unblocking
-system.cpu.iew.lsq.thread.0.blockedLoads            1                       # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked         1656                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads              81                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            3                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           40                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           61                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         2796                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         2922                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             40                       # Number of memory order violations
+system.cpu.iew.lsq.thread.0.squashedLoads         2791                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores         2911                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             61                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect          279                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect            122                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.004016                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.004016                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   13840                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0                   13833                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                           (null)            2      0.01%            # Type of FU issued
-                          IntAlu         8249     59.60%            # Type of FU issued
+                          IntAlu         8240     59.57%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.01%            # Type of FU issued
@@ -279,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3432     24.80%            # Type of FU issued
-                        MemWrite         2154     15.56%            # Type of FU issued
+                         MemRead         3428     24.78%            # Type of FU issued
+                        MemWrite         2160     15.61%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                    86                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.006214                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                    87                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.006289                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu            1      1.16%            # attempts to use FU when none available
+                          IntAlu            1      1.15%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -297,38 +297,38 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           53     61.63%            # attempts to use FU when none available
-                        MemWrite           32     37.21%            # attempts to use FU when none available
+                         MemRead           54     62.07%            # attempts to use FU when none available
+                        MemWrite           32     36.78%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        53773                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        54742                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        46903   8722.41%           
-                               1         3262    606.62%           
-                               2         1316    244.73%           
-                               3         1665    309.63%           
-                               4          333     61.93%           
-                               5          188     34.96%           
-                               6           73     13.58%           
-                               7           23      4.28%           
-                               8           10      1.86%           
+                               0        47874   8745.39%           
+                               1         3270    597.35%           
+                               2         1302    237.84%           
+                               3         1673    305.62%           
+                               4          327     59.73%           
+                               5          188     34.34%           
+                               6           75     13.70%           
+                               7           22      4.02%           
+                               8           11      2.01%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.257378                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      19393                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     13840                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.252694                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      19367                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     13833                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  24                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           13381                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                72                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsExamined           13339                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                73                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              7                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         9575                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined         9527                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               480                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4520.691667                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency  4520.693750                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2303.372917                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2169932                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       2169933                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 480                       # number of ReadReq misses
 system.cpu.l2cache.ReadReq_mshr_miss_latency      1105619                       # number of ReadReq MSHR miss cycles
@@ -343,10 +343,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                480                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4520.691667                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4520.693750                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2169932                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2169933                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  480                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
@@ -357,11 +357,11 @@ system.cpu.l2cache.fast_writes                      0                       # nu
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               480                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4520.691667                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4520.693750                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2169932                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2169933                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 480                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
@@ -382,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   480                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               248.469634                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               248.469469                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            53773                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles             7860                       # Number of cycles rename is blocking
+system.cpu.numCycles                            54742                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles             7851                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents               2                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles             28280                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            453                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:IdleCycles             29263                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            458                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:ROBFullEvents              8                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups          36016                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           29203                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        20142                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               5460                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            2529                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            483                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             16091                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         9161                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:RenameLookups          35953                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           29156                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        20115                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               5451                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            2527                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            486                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps             16064                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         9164                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           27                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                828                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts                831                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           21                       # count of temporary serializing insts renamed
 system.cpu.timesIdled                             369                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
index eb1796ead..684350ff9 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
@@ -1,2 +1,3 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
 warn: Entering event queue @ 0.  Starting simulation...
+warn: Increasing stack size by one page.
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index 511bc594d..cbdc4ee25 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,8 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:06:54 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
-Exiting @ tick 1400135 because target called exit()
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:12 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 1400134 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index db88e7673..ea499f4f1 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -1,48 +1,7 @@
 [root]
 type=Root
 children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
 
 [system]
 type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
 commitToIEWDelay=1
 commitToRenameDelay=1
 commitWidth=8
+cpu_id=0
 decodeToFetchDelay=1
 decodeToRenameDelay=1
 decodeWidth=8
@@ -417,12 +377,3 @@ range=0:134217727
 zero=false
 port=system.membus.port[0]
 
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
index 9ee1931ca..6672039dd 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
@@ -1,9 +1,6 @@
 [root]
 type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
 
 [system.physmem]
 type=PhysicalMemory
@@ -173,6 +170,7 @@ type=DerivO3CPU
 clock=1
 phase=0
 numThreads=1
+cpu_id=0
 activity=0
 workload=system.cpu.workload
 checker=null
@@ -367,51 +365,3 @@ clock=1000
 width=64
 responder_set=false
 
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index 3aae57d12..f855ff850 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -8,10 +8,10 @@ global.BPredUnit.condIncorrect                    218                       # Nu
 global.BPredUnit.condPredicted                    459                       # Number of conditional branches predicted
 global.BPredUnit.lookups                          898                       # Number of BP lookups
 global.BPredUnit.usedRAS                          171                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  22132                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 176684                       # Number of bytes of host memory used
-host_seconds                                     0.11                       # Real time elapsed on the host
-host_tick_rate                                6945216                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  12517                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155528                       # Number of bytes of host memory used
+host_seconds                                     0.19                       # Real time elapsed on the host
+host_tick_rate                                3937113                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 8                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                   783                       # Number of loads inserted to the mem dependence unit.
@@ -26,14 +26,14 @@ system.cpu.commit.COM:bw_limited                    0                       # nu
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle.samples        28200                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        27270   9670.21%           
-                               1          239     84.75%           
-                               2          332    117.73%           
+                               0        27273   9671.28%           
+                               1          240     85.11%           
+                               2          328    116.31%           
                                3          127     45.04%           
-                               4           83     29.43%           
+                               4           80     28.37%           
                                5           54     19.15%           
-                               6           26      9.22%           
-                               7           18      6.38%           
+                               6           28      9.93%           
+                               7           19      6.74%           
                                8           51     18.09%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
@@ -52,14 +52,14 @@ system.cpu.committedInsts_total                  2387                       # Nu
 system.cpu.cpi                             315.051529                       # CPI: Cycles Per Instruction
 system.cpu.cpi_total                       315.051529                       # CPI: Total CPI of All Threads
 system.cpu.dcache.ReadReq_accesses                560                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  7231.967391                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7288.377049                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency  7232.163043                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7288.491803                       # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits                    468                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         665341                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         665359                       # number of ReadReq miss cycles
 system.cpu.dcache.ReadReq_miss_rate          0.164286                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                   92                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                31                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       444591                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       444598                       # number of ReadReq MSHR miss cycles
 system.cpu.dcache.ReadReq_mshr_miss_rate     0.108929                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              61                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
@@ -74,37 +74,37 @@ system.cpu.dcache.WriteReq_mshr_miss_latency       157720
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.081633                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             24                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets         2980                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_targets  2980.375000                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                   8.141176                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                8                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets        23840                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets        23843                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
 system.cpu.dcache.demand_accesses                 854                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  6979.500000                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  7086.011765                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_miss_latency  6979.611111                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  7086.094118                       # average overall mshr miss latency
 system.cpu.dcache.demand_hits                     692                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1130679                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         1130697                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate           0.189696                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   162                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                 77                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       602311                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency       602318                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate      0.099532                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses               85                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.dcache.overall_accesses                854                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  6979.500000                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  7086.011765                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency  6979.611111                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  7086.094118                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_hits                    692                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1130679                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        1130697                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate          0.189696                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  162                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                77                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       602311                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency       602318                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate     0.099532                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses              85                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -125,18 +125,18 @@ system.cpu.dcache.tagsinuse                 46.684988                       # Cy
 system.cpu.dcache.total_refs                      692                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          21865                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles          21870                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             79                       # Number of times decode detected a branch misprediction
 system.cpu.decode.DECODE:BranchResolved           150                       # Number of times decode resolved a branch
 system.cpu.decode.DECODE:DecodedInsts            4900                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles              5406                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                928                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles                923                       # Number of cycles decode is running
 system.cpu.decode.DECODE:SquashCycles             336                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            286                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles              2                       # Number of cycles decode is unblocking
 system.cpu.fetch.Branches                         898                       # Number of branches that fetch encountered
 system.cpu.fetch.CacheLines                       813                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1774                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                          1769                       # Number of cycles fetch has run and was not squashing or blocked
 system.cpu.fetch.IcacheSquashes                   146                       # Number of outstanding Icache misses that were squashed
 system.cpu.fetch.Insts                           5593                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                     258                       # Number of cycles fetch has spent squashing
@@ -147,27 +147,27 @@ system.cpu.fetch.rate                        0.195991                       # Nu
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist.samples               28537                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        27576   9663.24%           
+                               0        27581   9665.00%           
                                1           50     17.52%           
-                               2           92     32.24%           
-                               3           74     25.93%           
-                               4          117     41.00%           
-                               5           71     24.88%           
-                               6           43     15.07%           
+                               2           84     29.44%           
+                               3           78     27.33%           
+                               4          118     41.35%           
+                               5           67     23.48%           
+                               6           41     14.37%           
                                7           56     19.62%           
-                               8          458    160.49%           
+                               8          462    161.90%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
 system.cpu.icache.ReadReq_accesses                813                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  4955.450199                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4151.809783                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  4955.454183                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4151.815217                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                    562                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1243818                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        1243819                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.308733                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  251                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                67                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       763933                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency       763934                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.226322                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             184                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -179,29 +179,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets        13780                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                 813                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  4955.450199                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4151.809783                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_miss_latency  4955.454183                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4151.815217                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                     562                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1243818                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         1243819                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.308733                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   251                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 67                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       763933                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency       763934                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.226322                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              184                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses                813                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  4955.450199                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4151.809783                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_miss_latency  4955.454183                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4151.815217                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                    562                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1243818                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        1243819                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.308733                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  251                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                67                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       763933                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency       763934                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.226322                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             184                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -231,14 +231,14 @@ system.cpu.iew.EXEC:stores                        341                       # Nu
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.WB:consumers                      1860                       # num instructions consuming a value
 system.cpu.iew.WB:count                          3219                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.785484                       # average fanout of values written-back
+system.cpu.iew.WB:fanout                     0.786022                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      1461                       # num instructions producing a value
+system.cpu.iew.WB:producers                      1462                       # num instructions producing a value
 system.cpu.iew.WB:rate                       0.112801                       # insts written-back per cycle
 system.cpu.iew.WB:sent                           3234                       # cumulative count of insts sent to commit
 system.cpu.iew.branchMispredicts                  152                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                   14742                       # Number of cycles IEW is blocking
+system.cpu.iew.iewBlockCycles                   14743                       # Number of cycles IEW is blocking
 system.cpu.iew.iewDispLoadInsts                   783                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                  6                       # Number of dispatched non-speculative instructions
 system.cpu.iew.iewDispSquashedInsts                79                       # Number of squashed instructions skipped by dispatch
@@ -258,11 +258,11 @@ system.cpu.iew.lsq.thread.0.forwLoads              29                       # Nu
 system.cpu.iew.lsq.thread.0.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           12                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           15                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
 system.cpu.iew.lsq.thread.0.squashedLoads          368                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores           87                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             12                       # Number of memory order violations
+system.cpu.iew.memOrderViolationEvents             15                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect           95                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect             57                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.003174                       # IPC: Instructions Per Cycle
@@ -305,12 +305,12 @@ system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle.samples        28537                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        27012   9465.61%           
-                               1          616    215.86%           
-                               2          356    124.75%           
+                               0        27014   9466.31%           
+                               1          617    216.21%           
+                               2          351    123.00%           
                                3          247     86.55%           
-                               4          177     62.02%           
-                               5           81     28.38%           
+                               4          178     62.38%           
+                               5           82     28.73%           
                                6           32     11.21%           
                                7           11      3.85%           
                                8            5      1.75%           
@@ -326,12 +326,12 @@ system.cpu.iq.iqSquashedInstsIssued                25                       # Nu
 system.cpu.iq.iqSquashedNonSpecRemoved              2                       # Number of squashed non-spec instructions that were removed
 system.cpu.iq.iqSquashedOperandsExamined          801                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               269                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4621.724907                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2296.401487                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1243244                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency  4621.754647                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2296.408922                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       1243252                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 269                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency       617732                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency       617734                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            269                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -343,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                269                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4621.724907                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2296.401487                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4621.754647                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2296.408922                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1243244                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        1243252                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  269                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency       617732                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency       617734                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             269                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               269                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4621.724907                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2296.401487                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4621.754647                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2296.408922                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1243244                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       1243252                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 269                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency       617732                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency       617734                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            269                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -387,7 +387,7 @@ system.cpu.l2cache.total_refs                       0                       # To
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.numCycles                            28537                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles            14783                       # Number of cycles rename is blocking
+system.cpu.rename.RENAME:BlockCycles            14784                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents              18                       # Number of times rename has blocked due to IQ full
 system.cpu.rename.RENAME:IdleCycles              5489                       # Number of cycles rename is idle
@@ -396,11 +396,11 @@ system.cpu.rename.RENAME:ROBFullEvents              2                       # Nu
 system.cpu.rename.RENAME:RenameLookups           5285                       # Number of register rename lookups that rename has made
 system.cpu.rename.RENAME:RenamedInsts            4708                       # Number of instructions processed by rename
 system.cpu.rename.RENAME:RenamedOperands         3399                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                852                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles                847                       # Number of cycles rename is running
 system.cpu.rename.RENAME:SquashCycles             336                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles             25                       # Number of cycles rename is unblocking
 system.cpu.rename.RENAME:UndoneMaps              1631                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         7052                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializeStallCycles         7056                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts            8                       # count of serializing insts renamed
 system.cpu.rename.RENAME:skidInsts                 88                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts            6                       # count of temporary serializing insts renamed
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
index fb2137f1e..313de3c46 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
@@ -1,3 +1,4 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
 warn: Entering event queue @ 0.  Starting simulation...
+warn: Increasing stack size by one page.
 warn: ignoring syscall sigprocmask(1, 18446744073709547831, ...)
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index 6436baf8f..233834343 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,8 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:07:09 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:14 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+Global frequency set at 1000000000000 ticks per second
 Exiting @ tick 752028 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index 6eef745b4..e75a10c54 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -1,48 +1,7 @@
 [root]
 type=Root
 children=system
-checkpoint=
-clock=1000000000000
-max_tick=0
-output_file=cout
-progress_interval=0
-
-[exetrace]
-intel_format=false
-legion_lockstep=false
-pc_symbol=true
-print_cpseq=false
-print_cycle=true
-print_data=true
-print_effaddr=true
-print_fetchseq=false
-print_iregs=false
-print_opclass=true
-print_thread=true
-speculative=true
-trace_system=client
-
-[serialize]
-count=10
-cycle=0
-dir=cpt.%012d
-period=0
-
-[stats]
-descriptions=true
-dump_cycle=0
-dump_period=0
-dump_reset=false
-ignore_events=
-mysql_db=
-mysql_host=
-mysql_password=
-mysql_user=
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_compat=true
-text_file=m5stats.txt
+dummy=0
 
 [system]
 type=System
@@ -70,6 +29,7 @@ commitToFetchDelay=1
 commitToIEWDelay=1
 commitToRenameDelay=1
 commitWidth=8
+cpu_id=0
 decodeToFetchDelay=1
 decodeToRenameDelay=1
 decodeWidth=8
@@ -433,12 +393,3 @@ range=0:134217727
 zero=false
 port=system.membus.port[0]
 
-[trace]
-bufsize=0
-cycle=0
-dump_on_exit=false
-file=cout
-flags=
-ignore=
-start=0
-
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
index f36f666af..9489e27c0 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
@@ -1,9 +1,6 @@
 [root]
 type=Root
-clock=1000000000000
-max_tick=0
-progress_interval=0
-output_file=cout
+dummy=0
 
 [system.physmem]
 type=PhysicalMemory
@@ -189,6 +186,7 @@ type=DerivO3CPU
 clock=1
 phase=0
 numThreads=1
+cpu_id=0
 activity=0
 workload=system.cpu.workload0 system.cpu.workload1
 checker=null
@@ -383,51 +381,3 @@ clock=1000
 width=64
 responder_set=false
 
-[trace]
-flags=
-start=0
-cycle=0
-bufsize=0
-file=cout
-dump_on_exit=false
-ignore=
-
-[stats]
-descriptions=true
-project_name=test
-simulation_name=test
-simulation_sample=0
-text_file=m5stats.txt
-text_compat=true
-mysql_db=
-mysql_user=
-mysql_password=
-mysql_host=
-events_start=-1
-dump_reset=false
-dump_cycle=0
-dump_period=0
-ignore_events=
-
-[random]
-seed=1
-
-[exetrace]
-speculative=true
-print_cycle=true
-print_opclass=true
-print_thread=true
-print_effaddr=true
-print_data=true
-print_iregs=false
-print_fetchseq=false
-print_cpseq=false
-print_reg_delta=false
-pc_symbol=true
-intel_format=false
-legion_lockstep=false
-trace_system=client
-
-[statsreset]
-reset_cycle=0
-
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index bb9e9360c..74e8f8d83 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,48 +1,48 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                         1334                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      6012                       # Number of BTB lookups
+global.BPredUnit.BTBHits                         1320                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      6181                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                     173                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   1201                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   4031                       # Number of conditional branches predicted
-global.BPredUnit.lookups                        12370                       # Number of BP lookups
-global.BPredUnit.usedRAS                         6337                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  11366                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 178064                       # Number of bytes of host memory used
-host_seconds                                     0.99                       # Real time elapsed on the host
-host_tick_rate                                2259917                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 27                       # Number of conflicting loads.
-memdepunit.memDep.conflictingLoads                 20                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores                97                       # Number of conflicting stores.
-memdepunit.memDep.conflictingStores                 3                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  5749                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads                  2822                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 4490                       # Number of stores inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1747                       # Number of stores inserted to the mem dependence unit.
+global.BPredUnit.condIncorrect                   1181                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   4228                       # Number of conditional branches predicted
+global.BPredUnit.lookups                        12535                       # Number of BP lookups
+global.BPredUnit.usedRAS                         6333                       # Number of times the RAS was used to get a target.
+host_inst_rate                                   6990                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 156628                       # Number of bytes of host memory used
+host_seconds                                     1.61                       # Real time elapsed on the host
+host_tick_rate                                1386962                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 26                       # Number of conflicting loads.
+memdepunit.memDep.conflictingLoads                 23                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores                 4                       # Number of conflicting stores.
+memdepunit.memDep.conflictingStores                 1                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  3657                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  5285                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1780                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 4439                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       11247                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
-sim_ticks                                     2237162                       # Number of ticks simulated
+sim_ticks                                     2232164                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   1724                       # Number of branches committed
 system.cpu.commit.COM:branches_0                  862                       # Number of branches committed
 system.cpu.commit.COM:branches_1                  862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               128                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               123                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_0                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_1                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples       188940                      
+system.cpu.commit.COM:committed_per_cycle.samples       189138                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0       183303   9701.65%           
-                               1         3121    165.18%           
-                               2         1239     65.58%           
-                               3          531     28.10%           
-                               4          275     14.55%           
-                               5          154      8.15%           
-                               6          128      6.77%           
+                               0       183476   9700.64%           
+                               1         3161    167.13%           
+                               2         1212     64.08%           
+                               3          544     28.76%           
+                               4          279     14.75%           
+                               5          155      8.20%           
+                               6          127      6.71%           
                                7           61      3.23%           
-                               8          128      6.77%           
+                               8          123      6.50%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -61,97 +61,97 @@ system.cpu.commit.COM:refs_1                     1791                       # Nu
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_0                   0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_1                   0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               943                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               938                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          11281                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              34                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           28509                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts           29588                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts_0                      5624                       # Number of Instructions Simulated
 system.cpu.committedInsts_1                      5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 11247                       # Number of Instructions Simulated
-system.cpu.cpi_0                           397.788407                       # CPI: Cycles Per Instruction
-system.cpu.cpi_1                           397.859150                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       198.911888                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               3186                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0             3186                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  9969.378125                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_miss_latency_0  9969.378125                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10500.608040                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10500.608040                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2866                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0                 2866                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        3190201                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0      3190201                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.100439                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_miss_rate_0        0.100439                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  320                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0                320                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits               121                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0             121                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      2089621                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0      2089621                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.062461                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.062461                       # mshr miss rate for ReadReq accesses
+system.cpu.cpi_0                           396.899716                       # CPI: Cycles Per Instruction
+system.cpu.cpi_1                           396.970301                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       198.467502                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               3176                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0             3176                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  9976.257143                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency_0  9976.257143                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 10425.356784                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 10425.356784                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   2861                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0                 2861                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        3142521                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0      3142521                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.099181                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate_0        0.099181                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  315                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0                315                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits               116                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0             116                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency      2074646                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0      2074646                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.062657                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.062657                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses             199                       # number of ReadReq MSHR misses
 system.cpu.dcache.ReadReq_mshr_misses_0           199                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses              1624                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_accesses_0            1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  6540.875740                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_miss_latency_0  6540.875740                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7803.746575                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  7803.746575                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  6512.846154                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency_0  6512.846154                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7776.006849                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  7776.006849                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                  1117                       # number of WriteReq hits
 system.cpu.dcache.WriteReq_hits_0                1117                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       3316224                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0      3316224                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       3302013                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0      3302013                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.312192                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_miss_rate_0       0.312192                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 507                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_misses_0               507                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              361                       # number of WriteReq MSHR hits
 system.cpu.dcache.WriteReq_mshr_hits_0            361                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency      1139347                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0      1139347                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency      1135297                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0      1135297                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses            146                       # number of WriteReq MSHR misses
 system.cpu.dcache.WriteReq_mshr_misses_0          146                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs         3973                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  3625.380952                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.544928                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets  3613.488095                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                  11.563953                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  1                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets               84                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs         3973                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets       304532                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets       303533                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4810                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0              4810                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                4800                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0              4800                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_accesses_1                 0                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  7867.503023                       # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0  7867.503023                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency  7840.065693                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0  7840.065693                       # average overall miss latency
 system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  9359.327536                       # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0  9359.327536                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  9304.182609                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0  9304.182609                       # average overall mshr miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3983                       # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0                  3983                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3978                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0                  3978                       # number of demand (read+write) hits
 system.cpu.dcache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         6506425                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0       6506425                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         6444534                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0       6444534                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.171933                       # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0         0.171933                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate           0.171250                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0         0.171250                       # miss rate for demand accesses
 system.cpu.dcache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   827                       # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0                 827                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                   822                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0                 822                       # number of demand (read+write) misses
 system.cpu.dcache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                482                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0              482                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                477                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0              477                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      3228968                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0      3228968                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      3209943                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0      3209943                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.071726                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0     0.071726                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.071875                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0     0.071875                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              345                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.demand_mshr_misses_0            345                       # number of demand (read+write) MSHR misses
@@ -161,38 +161,38 @@ system.cpu.dcache.mshr_cap_events                   0                       # nu
 system.cpu.dcache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4810                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0             4810                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               4800                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0             4800                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_accesses_1                0                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  7867.503023                       # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0  7867.503023                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency  7840.065693                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0  7840.065693                       # average overall miss latency
 system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  9359.327536                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0  9359.327536                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  9304.182609                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0  9304.182609                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3983                       # number of overall hits
-system.cpu.dcache.overall_hits_0                 3983                       # number of overall hits
+system.cpu.dcache.overall_hits                   3978                       # number of overall hits
+system.cpu.dcache.overall_hits_0                 3978                       # number of overall hits
 system.cpu.dcache.overall_hits_1                    0                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        6506425                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0      6506425                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        6444534                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0      6444534                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_latency_1            0                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.171933                       # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0        0.171933                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate          0.171250                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0        0.171250                       # miss rate for overall accesses
 system.cpu.dcache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  827                       # number of overall misses
-system.cpu.dcache.overall_misses_0                827                       # number of overall misses
+system.cpu.dcache.overall_misses                  822                       # number of overall misses
+system.cpu.dcache.overall_misses_0                822                       # number of overall misses
 system.cpu.dcache.overall_misses_1                  0                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               482                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0             482                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               477                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0             477                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      3228968                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0      3228968                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      3209943                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0      3209943                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.071726                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0     0.071726                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.071875                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0     0.071875                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             345                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_misses_0           345                       # number of overall MSHR misses
@@ -215,153 +215,153 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.replacements_0                    0                       # number of replacements
 system.cpu.dcache.replacements_1                    0                       # number of replacements
-system.cpu.dcache.sampled_refs                    345                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    344                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                198.670475                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3983                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                198.340517                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     3978                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dcache.writebacks_0                      0                       # number of writebacks
 system.cpu.dcache.writebacks_1                      0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          97618                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            267                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           390                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           67048                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles            262280                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles              12122                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            5552                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            680                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles            155                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                       12370                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                     13012                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         27804                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   800                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          79582                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    4833                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.065467                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles              52787                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               7671                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.421180                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:BlockedCycles          95932                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BranchMispred            257                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchResolved           378                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           68233                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles            264032                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles              12255                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            5733                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashedInsts            618                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:UnblockCycles            167                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                       12535                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                     13184                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                         28123                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   886                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          80687                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    4911                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.066271                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles              53960                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               7653                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.426584                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples              188950                      
+system.cpu.fetch.rateDist.samples              189147                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0       174142   9216.30%           
-                               1          378     20.01%           
-                               2          298     15.77%           
-                               3         3656    193.49%           
-                               4         2200    116.43%           
-                               5         1017     53.82%           
-                               6          974     51.55%           
-                               7         2369    125.38%           
-                               8         3916    207.25%           
+                               0       174193   9209.40%           
+                               1          369     19.51%           
+                               2          281     14.86%           
+                               3         3638    192.34%           
+                               4         2283    120.70%           
+                               5         1005     53.13%           
+                               6          984     52.02%           
+                               7         2371    125.35%           
+                               8         4023    212.69%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses              13010                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0            13010                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  7746.912281                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_miss_latency_0  7746.912281                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  7155.055556                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  7155.055556                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                  12098                       # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0                12098                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        7065184                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0      7065184                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.070100                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_miss_rate_0        0.070100                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses              13182                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0            13182                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  7732.322368                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency_0  7732.322368                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  7128.205742                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  7128.205742                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                  12270                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0                12270                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        7051878                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0      7051878                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.069185                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate_0        0.069185                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  912                       # number of ReadReq misses
 system.cpu.icache.ReadReq_misses_0                912                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               282                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0             282                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      4507685                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0      4507685                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.048424                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_miss_rate_0     0.048424                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             630                       # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0           630                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_hits               285                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0             285                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      4469385                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0      4469385                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.047565                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate_0     0.047565                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             627                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0           627                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets  5648.647059                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  19.203175                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets  5603.944444                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                  19.569378                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets               17                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets               18                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets        96027                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets       100871                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses               13010                       # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0             13010                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses               13182                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0             13182                       # number of demand (read+write) accesses
 system.cpu.icache.demand_accesses_1                 0                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  7746.912281                       # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0  7746.912281                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency  7732.322368                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0  7732.322368                       # average overall miss latency
 system.cpu.icache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  7155.055556                       # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0  7155.055556                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  7128.205742                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0  7128.205742                       # average overall mshr miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                   12098                       # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0                 12098                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                   12270                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0                 12270                       # number of demand (read+write) hits
 system.cpu.icache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         7065184                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0       7065184                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         7051878                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0       7051878                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.070100                       # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0         0.070100                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate           0.069185                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0         0.069185                       # miss rate for demand accesses
 system.cpu.icache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   912                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_0                 912                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                282                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0              282                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                285                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0              285                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      4507685                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0      4507685                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      4469385                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0      4469385                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.048424                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0     0.048424                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.047565                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0     0.047565                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              630                       # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0            630                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              627                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0            627                       # number of demand (read+write) MSHR misses
 system.cpu.icache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses              13010                       # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0            13010                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses              13182                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0            13182                       # number of overall (read+write) accesses
 system.cpu.icache.overall_accesses_1                0                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  7746.912281                       # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0  7746.912281                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency  7732.322368                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0  7732.322368                       # average overall miss latency
 system.cpu.icache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  7155.055556                       # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0  7155.055556                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  7128.205742                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0  7128.205742                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                  12098                       # number of overall hits
-system.cpu.icache.overall_hits_0                12098                       # number of overall hits
+system.cpu.icache.overall_hits                  12270                       # number of overall hits
+system.cpu.icache.overall_hits_0                12270                       # number of overall hits
 system.cpu.icache.overall_hits_1                    0                       # number of overall hits
-system.cpu.icache.overall_miss_latency        7065184                       # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0      7065184                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        7051878                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0      7051878                       # number of overall miss cycles
 system.cpu.icache.overall_miss_latency_1            0                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.070100                       # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0        0.070100                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate          0.069185                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0        0.069185                       # miss rate for overall accesses
 system.cpu.icache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  912                       # number of overall misses
 system.cpu.icache.overall_misses_0                912                       # number of overall misses
 system.cpu.icache.overall_misses_1                  0                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               282                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0             282                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits               285                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0             285                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      4507685                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0      4507685                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      4469385                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0      4469385                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.048424                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0     0.048424                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.047565                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0     0.047565                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             630                       # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0           630                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             627                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0           627                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -381,138 +381,138 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      6                       # number of replacements
 system.cpu.icache.replacements_0                    6                       # number of replacements
 system.cpu.icache.replacements_1                    0                       # number of replacements
-system.cpu.icache.sampled_refs                    630                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    627                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                289.377534                       # Cycle average of tags in use
-system.cpu.icache.total_refs                    12098                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                288.361956                       # Cycle average of tags in use
+system.cpu.icache.total_refs                    12270                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.icache.writebacks_0                      0                       # number of writebacks
 system.cpu.icache.writebacks_1                      0                       # number of writebacks
-system.cpu.idleCycles                         2048213                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     4035                       # Number of branches executed
-system.cpu.iew.EXEC:branches_0                   2458                       # Number of branches executed
-system.cpu.iew.EXEC:branches_1                   1577                       # Number of branches executed
+system.cpu.idleCycles                         2043018                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     4024                       # Number of branches executed
+system.cpu.iew.EXEC:branches_0                   1569                       # Number of branches executed
+system.cpu.iew.EXEC:branches_1                   2455                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            84                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_0                          42                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_1                          42                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.142196                       # Inst execution rate
-system.cpu.iew.EXEC:refs                        10960                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0                       7253                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1                       3707                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       3812                       # Number of stores executed
-system.cpu.iew.EXEC:stores_0                     2509                       # Number of stores executed
-system.cpu.iew.EXEC:stores_1                     1303                       # Number of stores executed
+system.cpu.iew.EXEC:rate                     0.144523                       # Inst execution rate
+system.cpu.iew.EXEC:refs                        11361                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0                       4575                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1                       6786                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       3833                       # Number of stores executed
+system.cpu.iew.EXEC:stores_0                     1337                       # Number of stores executed
+system.cpu.iew.EXEC:stores_1                     2496                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_0                           0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_1                           0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     12377                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_0                    6652                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_1                    5725                       # num instructions consuming a value
-system.cpu.iew.WB:count                         22520                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_0                       12790                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_1                        9730                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.808516                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_0                   0.819753                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_1                   0.795459                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                     12385                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_0                    5750                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_1                    6635                       # num instructions consuming a value
+system.cpu.iew.WB:count                         22604                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_0                       10240                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_1                       12364                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.811385                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_0                   0.800522                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_1                   0.820799                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_0                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_1                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_0                  0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_1                  0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                     10007                       # num instructions producing a value
-system.cpu.iew.WB:producers_0                    5453                       # num instructions producing a value
-system.cpu.iew.WB:producers_1                    4554                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.119185                       # insts written-back per cycle
-system.cpu.iew.WB:rate_0                     0.067690                       # insts written-back per cycle
-system.cpu.iew.WB:rate_1                     0.051495                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          22674                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0                        12874                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1                         9800                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                 1030                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                   62040                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  8571                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                 42                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              5358                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 6237                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               39780                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  7148                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0                4744                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1                2404                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               903                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 26868                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                     44                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.WB:producers                     10049                       # num instructions producing a value
+system.cpu.iew.WB:producers_0                    4603                       # num instructions producing a value
+system.cpu.iew.WB:producers_1                    5446                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.119505                       # insts written-back per cycle
+system.cpu.iew.WB:rate_0                     0.054138                       # insts written-back per cycle
+system.cpu.iew.WB:rate_1                     0.065367                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          22763                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0                        10322                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1                        12441                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                 1027                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                   60103                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  8942                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                 41                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts              5344                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 6219                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               40858                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  7528                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0                3238                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1                4290                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               872                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 27336                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                     45                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
-system.cpu.iew.iewLSQFullEvents                     2                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   5552                       # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles                   117                       # Number of cycles IEW is unblocking
-system.cpu.iew.lsq.thread.0.blockedLoads            1                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked         3088                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              64                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.0.ignoredResponses            6                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.iewLSQFullEvents                     4                       # Number of times the LSQ has become full, causing a stall
+system.cpu.iew.iewSquashCycles                   5733                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                   122                       # Number of cycles IEW is unblocking
+system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.0.cacheBlocked         1584                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.forwLoads              65                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.ignoredResponses           10                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           34                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           56                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         4770                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         3678                       # Number of stores squashed
-system.cpu.iew.lsq.thread.1.blockedLoads            1                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.1.cacheBlocked          756                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads              64                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses           10                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.0.squashedLoads         2678                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores          968                       # Number of stores squashed
+system.cpu.iew.lsq.thread.1.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
+system.cpu.iew.lsq.thread.1.cacheBlocked         2643                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.1.forwLoads              67                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.ignoredResponses            7                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.1.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.1.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation           29                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation           54                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.1.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads         1843                       # Number of loads squashed
-system.cpu.iew.lsq.thread.1.squashedStores          935                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             63                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          798                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            232                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0                             0.002514                       # IPC: Instructions Per Cycle
-system.cpu.ipc_1                             0.002513                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.005027                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   16536                       # Type of FU issued
+system.cpu.iew.lsq.thread.1.squashedLoads         4306                       # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedStores         3627                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents            110                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          796                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            231                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc_0                             0.002520                       # IPC: Instructions Per Cycle
+system.cpu.ipc_1                             0.002519                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.005039                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                   12578                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
-                          (null)            2      0.01%            # Type of FU issued
-                          IntAlu         9136     55.25%            # Type of FU issued
+                          (null)            2      0.02%            # Type of FU issued
+                          IntAlu         7865     62.53%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            2      0.01%            # Type of FU issued
+                        FloatAdd            2      0.02%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         4850     29.33%            # Type of FU issued
-                        MemWrite         2545     15.39%            # Type of FU issued
+                         MemRead         3344     26.59%            # Type of FU issued
+                        MemWrite         1364     10.84%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1                   11235                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1                   15630                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.start_dist
-                          (null)            2      0.02%            # Type of FU issued
-                          IntAlu         7383     65.71%            # Type of FU issued
+                          (null)            2      0.01%            # Type of FU issued
+                          IntAlu         8707     55.71%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
-                        FloatAdd            2      0.02%            # Type of FU issued
+                        FloatAdd            2      0.01%            # Type of FU issued
                         FloatCmp            0      0.00%            # Type of FU issued
                         FloatCvt            0      0.00%            # Type of FU issued
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         2518     22.41%            # Type of FU issued
-                        MemWrite         1329     11.83%            # Type of FU issued
+                         MemRead         4394     28.11%            # Type of FU issued
+                        MemWrite         2524     16.15%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type                     27771                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type                     28208                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.start_dist
                           (null)            4      0.01%            # Type of FU issued
-                          IntAlu        16519     59.48%            # Type of FU issued
+                          IntAlu        16572     58.75%            # Type of FU issued
                          IntMult            2      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            4      0.01%            # Type of FU issued
@@ -521,20 +521,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         7368     26.53%            # Type of FU issued
-                        MemWrite         3874     13.95%            # Type of FU issued
+                         MemRead         7738     27.43%            # Type of FU issued
+                        MemWrite         3888     13.78%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   146                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0                  73                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1                  73                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.005257                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0           0.002629                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1           0.002629                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                   149                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0                  72                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1                  77                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.005282                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0           0.002552                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1           0.002730                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu            0      0.00%            # attempts to use FU when none available
+                          IntAlu            1      0.67%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -543,52 +543,52 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           83     56.85%            # attempts to use FU when none available
-                        MemWrite           63     43.15%            # attempts to use FU when none available
+                         MemRead           83     55.70%            # attempts to use FU when none available
+                        MemWrite           65     43.62%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples       188950                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples       189147                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0       174613   9241.23%           
-                               1         6958    368.25%           
-                               2         3428    181.42%           
-                               3         2696    142.68%           
-                               4          636     33.66%           
-                               5          439     23.23%           
-                               6          143      7.57%           
-                               7           24      1.27%           
-                               8           13      0.69%           
+                               0       174626   9232.29%           
+                               1         7072    373.89%           
+                               2         3403    179.91%           
+                               3         2709    143.22%           
+                               4          713     37.70%           
+                               5          443     23.42%           
+                               6          143      7.56%           
+                               7           26      1.37%           
+                               8           12      0.63%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.146975                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      39654                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     27771                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                  42                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           27426                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued               185                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved              8                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined        20011                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               973                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0             973                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  6750.932169                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_miss_latency_0  6750.932169                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  3603.773895                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  3603.773895                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       6568657                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0      6568657                       # number of ReadReq miss cycles
+system.cpu.iq.ISSUE:rate                     0.149133                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      40733                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     28208                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                  41                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined           28495                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued               192                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved              7                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined        21369                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses               970                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0             970                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  6748.795876                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency_0  6748.795876                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  3604.818557                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  3604.818557                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       6546332                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0      6546332                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_miss_rate_0              1                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 973                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0               973                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      3506472                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0      3506472                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_misses                 970                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0               970                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      3496674                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0      3496674                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_miss_rate_0            1                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            973                       # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0          973                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            970                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0          970                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                         0                       # Average number of references to valid blocks.
@@ -597,52 +597,52 @@ system.cpu.l2cache.blocked_no_targets               0                       # nu
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                973                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0              973                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                970                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0              970                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_accesses_1                0                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  6750.932169                       # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0  6750.932169                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  6748.795876                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0  6748.795876                       # average overall miss latency
 system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  3603.773895                       # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0  3603.773895                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  3604.818557                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0  3604.818557                       # average overall mshr miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_0                    0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_1                    0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        6568657                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0      6568657                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        6546332                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0      6546332                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_latency_1            0                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_0               1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_1    <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  973                       # number of demand (read+write) misses
-system.cpu.l2cache.demand_misses_0                973                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  970                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses_0                970                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_misses_1                  0                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_0               0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_1               0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      3506472                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0      3506472                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      3496674                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0      3496674                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_0            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             973                       # number of demand (read+write) MSHR misses
-system.cpu.l2cache.demand_mshr_misses_0           973                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             970                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses_0           970                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.demand_mshr_misses_1             0                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_0                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_1                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               973                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0             973                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               970                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0             970                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_accesses_1               0                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  6750.932169                       # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0  6750.932169                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  6748.795876                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0  6748.795876                       # average overall miss latency
 system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  3603.773895                       # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0  3603.773895                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  3604.818557                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0  3604.818557                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
@@ -650,26 +650,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
 system.cpu.l2cache.overall_hits_0                   0                       # number of overall hits
 system.cpu.l2cache.overall_hits_1                   0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       6568657                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0      6568657                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       6546332                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0      6546332                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_0              1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_1   <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 973                       # number of overall misses
-system.cpu.l2cache.overall_misses_0               973                       # number of overall misses
+system.cpu.l2cache.overall_misses                 970                       # number of overall misses
+system.cpu.l2cache.overall_misses_0               970                       # number of overall misses
 system.cpu.l2cache.overall_misses_1                 0                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_0              0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_1              0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      3506472                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0      3506472                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      3496674                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0      3496674                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_0            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            973                       # number of overall MSHR misses
-system.cpu.l2cache.overall_mshr_misses_0          973                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            970                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses_0          970                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_misses_1            0                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@@ -689,35 +689,35 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.replacements_0                   0                       # number of replacements
 system.cpu.l2cache.replacements_1                   0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   973                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   969                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               489.113488                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               487.752870                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.l2cache.writebacks_0                     0                       # number of writebacks
 system.cpu.l2cache.writebacks_1                     0                       # number of writebacks
-system.cpu.numCycles                           188950                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles            74870                       # Number of cycles rename is blocking
+system.cpu.numCycles                           189147                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles            73147                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           8102                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IQFullEvents              21                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles            263382                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents           2455                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:IQFullEvents              24                       # Number of times rename has blocked due to IQ full
+system.cpu.rename.RENAME:IdleCycles            265134                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents           2520                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:ROBFullEvents             31                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups          72755                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           60875                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        44048                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles              11047                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            5552                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles           2536                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             35946                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles        20340                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializingInsts           51                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               4990                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:tempSerializingInsts           38                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                             690                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.rename.RENAME:RenameLookups          74254                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           61970                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        45003                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles              11202                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            5733                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles           2584                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps             36901                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles        20319                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts           49                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts               5114                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts           37                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                             691                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload0.PROG:num_syscalls             17                       # Number of system calls
 system.cpu.workload1.PROG:num_syscalls             17                       # Number of system calls
 
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
index c36de0b79..d8ccd6207 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
@@ -1,3 +1,5 @@
-0: system.remote_gdb.listener: listening for remote gdb on port 7000
-0: system.remote_gdb.listener: listening for remote gdb on port 7001
+0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
+0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
+warn: Increasing stack size by one page.
+warn: Increasing stack size by one page.
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index f07a960f8..30a45522d 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,8 +7,9 @@ The Regents of The University of Michigan
 All Rights Reserved
 
 
-M5 compiled Jan 22 2007 23:06:52
-M5 started Mon Jan 22 23:07:23 2007
-M5 executing on ewok
-command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
-Exiting @ tick 2237162 because target called exit()
+M5 compiled Mar 24 2007 13:51:02
+M5 started Sat Mar 24 13:51:16 2007
+M5 executing on zizzer.eecs.umich.edu
+command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+Global frequency set at 1000000000000 ticks per second
+Exiting @ tick 2232164 because target called exit()